Spaces:

prithivMLmods
/

Multimodal-Edge-Node

Running on Zero

App Files Files Community

prithivMLmods commited on Apr 29

Commit

dd72372

verified ·

1 Parent(s): 3ff2b9e

Delete ultrashape

Browse files

Files changed (44) hide show

ultrashape/__init__.py +0 -17
ultrashape/data/objaverse_dit.py +0 -331
ultrashape/data/objaverse_vae.py +0 -262
ultrashape/data/utils.py +0 -193
ultrashape/models/__init__.py +0 -27
ultrashape/models/autoencoders/__init__.py +0 -21
ultrashape/models/autoencoders/attention_blocks.py +0 -711
ultrashape/models/autoencoders/attention_processors.py +0 -103
ultrashape/models/autoencoders/model.py +0 -377
ultrashape/models/autoencoders/surface_extractors.py +0 -266
ultrashape/models/autoencoders/vae_trainer.py +0 -229
ultrashape/models/autoencoders/volume_decoders.py +0 -440
ultrashape/models/conditioner_mask.py +0 -337
ultrashape/models/denoisers/__init__.py +0 -22
ultrashape/models/denoisers/dit_mask.py +0 -725
ultrashape/models/denoisers/moe_layers.py +0 -177
ultrashape/models/diffusion/flow_matching_dit_trainer.py +0 -313
ultrashape/models/diffusion/transport/__init__.py +0 -97
ultrashape/models/diffusion/transport/integrators.py +0 -142
ultrashape/models/diffusion/transport/path.py +0 -220
ultrashape/models/diffusion/transport/transport.py +0 -534
ultrashape/models/diffusion/transport/utils.py +0 -54
ultrashape/pipelines.py +0 -797
ultrashape/postprocessors.py +0 -209
ultrashape/preprocessors.py +0 -167
ultrashape/rembg.py +0 -32
ultrashape/schedulers.py +0 -480
ultrashape/surface_loaders.py +0 -233
ultrashape/utils/__init__.py +0 -6
ultrashape/utils/ema.py +0 -76
ultrashape/utils/misc.py +0 -200
ultrashape/utils/trainings/__init__.py +0 -1
ultrashape/utils/trainings/callback.py +0 -213
ultrashape/utils/trainings/lr_scheduler.py +0 -53
ultrashape/utils/trainings/mesh.py +0 -128
ultrashape/utils/trainings/mesh_log_callback.py +0 -342
ultrashape/utils/trainings/peft.py +0 -78
ultrashape/utils/typing.py +0 -41
ultrashape/utils/utils.py +0 -128
ultrashape/utils/visualizers/__init__.py +0 -1
ultrashape/utils/visualizers/color_util.py +0 -57
ultrashape/utils/visualizers/html_util.py +0 -64
ultrashape/utils/visualizers/pythreejs_viewer.py +0 -549
ultrashape/utils/voxelize.py +0 -74

ultrashape/__init__.py DELETED Viewed

@@ -1,17 +0,0 @@
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-from .pipelines import UltraShapePipeline
-from .postprocessors import FaceReducer, FloaterRemover, DegenerateFaceRemover, MeshSimplifier
-from .preprocessors import ImageProcessorV2, IMAGE_PROCESSORS, DEFAULT_IMAGEPROCESSOR

ultrashape/data/objaverse_dit.py DELETED Viewed

@@ -1,331 +0,0 @@
-# -*- coding: utf-8 -*-
-# ==============================================================================
-# Original work Copyright (c) 2025 Tencent.
-# Modified work Copyright (c) 2025 UltraShape Team.
-#
-# Modified by UltraShape on 2025.12.25
-# ==============================================================================
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import math
-import os
-import json
-from dataclasses import dataclass, field
-import random
-import imageio
-import numpy as np
-import pytorch_lightning as pl
-import torch
-import torch.nn.functional as F
-from torch.utils.data import DataLoader, Dataset
-from PIL import Image
-import pickle
-from ultrashape.utils.typing import *
-import pandas as pd
-import cv2
-import torchvision.transforms as transforms
-from pytorch_lightning.utilities import rank_zero_info
-def padding(image, mask, center=True, padding_ratio_range=[1.15, 1.15]):
-    """
-    Pad the input image and mask to a square shape with padding ratio.
-    Args:
-        image (np.ndarray): Input image array of shape (H, W, C).
-        mask (np.ndarray): Corresponding mask array of shape (H, W).
-        center (bool): Whether to center the original image in the padded output.
-        padding_ratio_range (list): Range [min, max] to randomly select padding ratio.
-    Returns:
-        newimg (np.ndarray): Padded image of shape (resize_side, resize_side, 3).
-        newmask (np.ndarray): Padded mask of shape (resize_side, resize_side).
-    """
-    h, w = image.shape[:2]
-    max_side = max(h, w)
-    # Select padding ratio either fixed or randomly within the given range
-    if padding_ratio_range[0] == padding_ratio_range[1]:
-        padding_ratio = padding_ratio_range[0]
-    else:
-        padding_ratio = random.uniform(padding_ratio_range[0], padding_ratio_range[1])
-    resize_side = int(max_side * padding_ratio)
-    pad_h = resize_side - h
-    pad_w = resize_side - w
-    if center:
-        start_h = pad_h // 2
-    else:
-        start_h = pad_h - resize_side // 20
-    start_w = pad_w // 2
-    # Create new white image and black mask with padded size
-    newimg = np.ones((resize_side, resize_side, 3), dtype=np.uint8) * 255
-    newmask = np.zeros((resize_side, resize_side), dtype=np.uint8)
-    # Place original image and mask into the padded canvas
-    newimg[start_h:start_h + h, start_w:start_w + w] = image
-    newmask[start_h:start_h + h, start_w:start_w + w] = mask
-    return newimg, newmask
-class ObjaverseDataset(Dataset):
-    def __init__(
-        self,
-        data_json,
-        sample_root,
-        image_path,
-        image_transform = None,
-        pc_size: int = 2048,
-        pc_sharpedge_size: int = 2048,
-        sharpedge_label: bool = False,
-        return_normal: bool = False,
-        padding = True,
-        padding_ratio_range=[1.15, 1.15],
-    ):
-        super().__init__()
-        self.uids = json.load(open(data_json))
-        self.sample_root = sample_root
-        self.image_paths = json.load(open(image_path))
-        self.image_transform = image_transform
-        self.pc_size = pc_size
-        self.pc_sharpedge_size = pc_sharpedge_size
-        self.sharpedge_label = sharpedge_label
-        self.return_normal = return_normal
-        self.padding = padding
-        self.padding_ratio_range = padding_ratio_range
-        print(f"Loaded {len(self.uids)} uids from {data_json}.")
-        rank_zero_info(f'*' * 50)
-        rank_zero_info(f'Dataset Infos:')
-        rank_zero_info(f'# of 3D file: {len(self.uids)}')
-        rank_zero_info(f'# of Surface Points: {self.pc_size}')
-        rank_zero_info(f'# of Sharpedge Surface Points: {self.pc_sharpedge_size}')
-        rank_zero_info(f'Using sharp edge label: {self.sharpedge_label}')
-        rank_zero_info(f'*' * 50)
-    def __len__(self):
-        return len(self.uids)
-    def _load_shape(self, index: int) -> Dict[str, Any]:
-        data = np.load(f'{self.sample_root}/{self.uids[index]}.npz')
-        surface_og = (np.asarray(data['clean_surface_points'])-0.5) * 2
-        normal = np.asarray(data['clean_surface_normals'])
-        surface_og_n = np.concatenate([surface_og, normal], axis=1)
-        rng = np.random.default_rng()
-        # hard code: first 300k are uniform, last 300k are sharp
-        assert surface_og_n.shape[0] == 600000, f"assume that suface points = 30w uniform + 30w curvature, but {len(surface_og_n)=}"
-        coarse_surface = surface_og_n[:300000]
-        sharp_surface = surface_og_n[300000:]
-        surface_normal = []
-        rng = np.random.default_rng()
-        if self.pc_size > 0:
-            ind = rng.choice(coarse_surface.shape[0], self.pc_size // 2, replace=False)
-            coarse_surface = coarse_surface[ind]
-            if self.sharpedge_label:
-                sharpedge_label = np.zeros((self.pc_size // 2, 1))
-                coarse_surface = np.concatenate((coarse_surface, sharpedge_label), axis=1)
-            surface_normal.append(coarse_surface)
-            ind_sharpedge = rng.choice(sharp_surface.shape[0], self.pc_size // 2, replace=False)
-            sharp_surface = sharp_surface[ind_sharpedge]
-            if self.sharpedge_label:
-                sharpedge_label = np.ones((self.pc_size // 2, 1))
-                sharp_surface = np.concatenate((sharp_surface, sharpedge_label), axis=1)
-            surface_normal.append(sharp_surface)
-        surface_normal = np.concatenate(surface_normal, axis=0)
-        surface_normal = torch.FloatTensor(surface_normal)
-        surface = surface_normal[:, 0:3]
-        normal = surface_normal[:, 3:6]
-        assert surface.shape[0] == self.pc_size + self.pc_sharpedge_size
-        geo_points = 0.0
-        normal = torch.nn.functional.normalize(normal, p=2, dim=1)
-        if self.return_normal:
-            surface = torch.cat([surface, normal], dim=-1)
-        if self.sharpedge_label:
-            surface = torch.cat([surface, surface_normal[:, -1:]], dim=-1)
-        ret = {
-                "uid": self.uids[index],
-                "surface": surface,
-                "geo_points": geo_points
-            }
-        return ret
-    def _load_image(self, index: int) -> Dict[str, Any]:
-        ret = {}
-        sel_idx = random.randint(0, 15)
-        ret["sel_image_idx"] = sel_idx
-        obj_name = self.uids[index]
-        img_path = f'{self.image_paths[obj_name]}/{os.path.basename(self.image_paths[obj_name])}/rgba/' + f"{sel_idx:03d}.png"
-        images, masks = [], []
-        image = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
-        assert image.shape[2] == 4
-        alpha = image[:, :, 3:4].astype(np.float32) / 255
-        forground = image[:, :, :3]
-        background = np.ones_like(forground) * 255
-        img_new = forground * alpha + background * (1 - alpha)
-        image = img_new.astype(np.uint8)
-        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-        mask = (alpha[:, :, 0] * 255).astype(np.uint8)
-        if self.padding:
-            h, w = image.shape[:2]
-            binary = mask > 0.3
-            non_zero_coords = np.argwhere(binary)
-            x_min, y_min = non_zero_coords.min(axis=0)
-            x_max, y_max = non_zero_coords.max(axis=0)
-            image, mask = padding(
-                image[max(x_min - 5, 0):min(x_max + 5, h), max(y_min - 5, 0):min(y_max + 5, w)],
-                mask[max(x_min - 5, 0):min(x_max + 5, h), max(y_min - 5, 0):min(y_max + 5, w)],
-                center=True, padding_ratio_range=self.padding_ratio_range)
-        if self.image_transform:
-            image = self.image_transform(image)
-            mask = np.stack((mask, mask, mask), axis=-1)
-            mask = self.image_transform(mask)
-        images.append(image)
-        masks.append(mask)
-        ret["image"] = torch.cat(images, dim=0)
-        ret["mask"] = torch.cat(masks, dim=0)[:1, ...]
-        return ret
-    def get_data(self, index):
-        ret = self._load_shape(index)
-        ret.update(self._load_image(index))
-        return ret
-    def __getitem__(self, index):
-        try:
-            return self.get_data(index)
-        except Exception as e:
-            print(f"Error in {self.uids[index]}: {e}")
-            return self.__getitem__(np.random.randint(len(self)))
-    def collate(self, batch):
-        batch = torch.utils.data.default_collate(batch)
-        return batch
-class ObjaverseDataModule(pl.LightningDataModule):
-    def __init__(
-        self,
-        batch_size: int = 1,
-        num_workers: int = 4,
-        val_num_workers: int = 2,
-        training_data_list: str = None,
-        sample_pcd_dir: str = None,
-        image_data_json: str = None,
-        image_size: int = 224,
-        mean: Union[List[float], Tuple[float]] = (0.485, 0.456, 0.406),
-        std: Union[List[float], Tuple[float]] = (0.229, 0.224, 0.225),
-        pc_size: int = 2048,
-        pc_sharpedge_size: int = 2048,
-        sharpedge_label: bool = False,
-        return_normal: bool = False,
-        padding = True,
-        padding_ratio_range=[1.15, 1.15]
-    ):
-        super().__init__()
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-        self.val_num_workers = val_num_workers
-        self.training_data_list = training_data_list
-        self.sample_pcd_dir = sample_pcd_dir
-        self.image_data_json = image_data_json
-        self.image_size = image_size
-        self.mean = mean
-        self.std = std
-        self.train_image_transform = transforms.Compose([
-            transforms.ToTensor(),
-            transforms.Resize(self.image_size),
-            transforms.Normalize(mean=self.mean, std=self.std)])
-        self.val_image_transform = transforms.Compose([
-            transforms.ToTensor(),
-            transforms.Resize(self.image_size),
-            transforms.Normalize(mean=self.mean, std=self.std)])
-        self.pc_size = pc_size
-        self.pc_sharpedge_size = pc_sharpedge_size
-        self.sharpedge_label = sharpedge_label
-        self.return_normal = return_normal
-        self.padding = padding
-        self.padding_ratio_range = padding_ratio_range
-    def train_dataloader(self):
-        asl_params = {
-            "data_json": f'{self.training_data_list}/train.json',
-            "sample_root": self.sample_pcd_dir,
-            "image_path": self.image_data_json,
-            "image_transform": self.train_image_transform,
-            "pc_size": self.pc_size,
-            "pc_sharpedge_size": self.pc_sharpedge_size,
-            "sharpedge_label": self.sharpedge_label,
-            "return_normal": self.return_normal,
-            "padding": self.padding,
-            "padding_ratio_range": self.padding_ratio_range,
-        }
-        dataset = ObjaverseDataset(**asl_params)
-        return torch.utils.data.DataLoader(
-            dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            pin_memory=True,
-            drop_last=True,
-        )
-    def val_dataloader(self):
-        asl_params = {
-            "data_json": f'{self.training_data_list}/val.json',
-            "sample_root": self.sample_pcd_dir,
-            "image_path": self.image_data_json,
-            "image_transform": self.val_image_transform,
-            "pc_size": self.pc_size,
-            "pc_sharpedge_size": self.pc_sharpedge_size,
-            "sharpedge_label": self.sharpedge_label,
-            "return_normal": self.return_normal,
-            "padding": self.padding,
-            "padding_ratio_range": self.padding_ratio_range,
-        }
-        dataset = ObjaverseDataset(**asl_params)
-        return torch.utils.data.DataLoader(
-            dataset,
-            batch_size=self.batch_size,
-            num_workers=self.val_num_workers,
-            pin_memory=True,
-            drop_last=True,
-        )

ultrashape/data/objaverse_vae.py DELETED Viewed

@@ -1,262 +0,0 @@
-# -*- coding: utf-8 -*-
-# ==============================================================================
-# Original work Copyright (c) 2025 Tencent.
-# Modified work Copyright (c) 2025 UltraShape Team.
-#
-# Modified by UltraShape on 2025.12.25
-# ==============================================================================
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import os
-import cv2
-import json
-import math
-import random
-import imageio
-import pickle
-import numpy as np
-from PIL import Image
-import pandas as pd
-from dataclasses import dataclass, field
-import torch
-import torch.nn.functional as F
-import pytorch_lightning as pl
-from torch.utils.data import DataLoader, Dataset
-import torchvision.transforms as transforms
-from pytorch_lightning.utilities import rank_zero_info
-from ultrashape.utils.typing import *
-class ObjaverseDataset(Dataset):
-    def __init__(
-        self,
-        data_json,
-        sample_root,
-        pc_size: int = 2048,
-        pc_sharpedge_size: int = 2048,
-        sup_near_uni_size: int = 4096,
-        sup_near_sharp_size: int = 4096,
-        sup_space_size: int = 4096,
-        tsdf_threshold: float = 0.05,
-        sharpedge_label: bool = False,
-        return_normal: bool = False,
-    ):
-        super().__init__()
-        self.uids = json.load(open(data_json))
-        self.sample_root = sample_root
-        self.pc_size = pc_size
-        self.pc_sharpedge_size = pc_sharpedge_size
-        self.sharpedge_label = sharpedge_label
-        self.return_normal = return_normal
-        self.sup_near_uni_size = sup_near_uni_size
-        self.sup_near_sharp_size = sup_near_sharp_size
-        self.sup_space_size = sup_space_size
-        self.tsdf_threshold = tsdf_threshold
-        print(f"Loaded {len(self.uids)} uids from {data_json}.")
-        rank_zero_info(f'*' * 50)
-        rank_zero_info(f'Dataset Infos:')
-        rank_zero_info(f'# of 3D file: {len(self.uids)}')
-        rank_zero_info(f'# of Surface Points: {self.pc_size}')
-        rank_zero_info(f'# of Sharpedge Surface Points: {self.pc_sharpedge_size}')
-        rank_zero_info(f'# of Uniform Near-Surface Sup-Points: {self.sup_near_uni_size}')
-        rank_zero_info(f'# of Sharpedge Near-Surface Sup-Points: {self.sup_near_sharp_size}')
-        rank_zero_info(f'# of Random Space Sup-Points: {self.sup_space_size}')
-        rank_zero_info(f'Using sharp edge label: {self.sharpedge_label}')
-        rank_zero_info(f'*' * 50)
-    def __len__(self):
-        return len(self.uids)
-    def _load_shape(self, index: int) -> Dict[str, Any]:
-        rng = np.random.default_rng()
-        data = np.load(f'{self.sample_root}/{self.uids[index]}.npz')
-        ##################### sup pcd&sdf ######################
-        uniform_near_points =  (np.asarray(data['uniform_near_points'])-0.5) * 2
-        curvature_near_points = (np.asarray(data['curvature_near_points'])-0.5) * 2
-        space_points = (np.asarray(data['space_points'])-0.5) * 2
-        uniform_near_sdf = np.asarray(data['uniform_near_sdf']) * 2
-        curvature_near_sdf = np.asarray(data['curvature_near_sdf']) * 2
-        space_sdf = np.asarray(data['space_sdf']) * 2
-        uni_noisy_idx = rng.choice(uniform_near_points.shape[0], self.sup_near_uni_size, replace=False)
-        cur_noisy_idx = rng.choice(curvature_near_points.shape[0], self.sup_near_sharp_size, replace=False)
-        space_idx = rng.choice(space_points.shape[0], self.sup_space_size, replace=False)
-        uniform_near_points = uniform_near_points[uni_noisy_idx]
-        curvature_near_points = curvature_near_points[cur_noisy_idx]
-        space_points = space_points[space_idx]
-        uniform_near_sdf = uniform_near_sdf[uni_noisy_idx]
-        curvature_near_sdf = curvature_near_sdf[cur_noisy_idx]
-        space_sdf = space_sdf[space_idx]
-        uniform_near_sdf, curvature_near_sdf, space_sdf = map(self._clip_to_tsdf, (uniform_near_sdf, curvature_near_sdf, space_sdf))
-        surface_og = (np.asarray(data['clean_surface_points'])-0.5) * 2
-        normal = np.asarray(data['clean_surface_normals'])
-        surface_og_n = np.concatenate([surface_og, normal], axis=1)
-        rng = np.random.default_rng()
-        # hard code: first 300k are uniform, last 300k are sharp
-        assert surface_og_n.shape[0] == 600000, f"assume that suface points = 30w uniform + 30w curvature, but {len(surface_og_n)=}"
-        coarse_surface = surface_og_n[:300000]
-        sharp_surface = surface_og_n[300000:]
-        surface_normal = []
-        if self.pc_size > 0:
-            ind = rng.choice(coarse_surface.shape[0], self.pc_size // 2, replace=False)
-            coarse_surface = coarse_surface[ind]
-            if self.sharpedge_label:
-                sharpedge_label = np.zeros((self.pc_size // 2, 1))
-                coarse_surface = np.concatenate((coarse_surface, sharpedge_label), axis=1)
-            surface_normal.append(coarse_surface)
-            ind_sharpedge = rng.choice(sharp_surface.shape[0], self.pc_size // 2, replace=False)
-            sharp_surface = sharp_surface[ind_sharpedge]
-            if self.sharpedge_label:
-                sharpedge_label = np.ones((self.pc_size // 2, 1))
-                sharp_surface = np.concatenate((sharp_surface, sharpedge_label), axis=1)
-            surface_normal.append(sharp_surface)
-        surface_normal = np.concatenate(surface_normal, axis=0)
-        surface_normal = torch.FloatTensor(surface_normal)
-        surface = surface_normal[:, 0:3]
-        normal = surface_normal[:, 3:6]
-        assert surface.shape[0] == self.pc_size + self.pc_sharpedge_size
-        geo_points = 0.0
-        normal = torch.nn.functional.normalize(normal, p=2, dim=1)
-        if self.return_normal:
-            surface = torch.cat([surface, normal], dim=-1)
-        if self.sharpedge_label:
-            surface = torch.cat([surface, surface_normal[:, -1:]], dim=-1)
-        ret = {
-                "uid": self.uids[index],
-                "surface": surface,
-                "sup_near_uniform": np.concatenate([uniform_near_points, uniform_near_sdf[...,None]], axis=1),
-                "sup_near_sharp": np.concatenate([curvature_near_points, curvature_near_sdf[...,None]], axis=1),
-                "sup_space": np.concatenate([space_points, space_sdf[...,None]], axis=1),
-                "geo_points": geo_points
-            }
-        return ret
-    def _clip_to_tsdf(self, sdf: np.array):
-        nan_mask = np.isnan(sdf)
-        if np.any(nan_mask):
-            sdf=np.nan_to_num(sdf, nan=1.0, posinf=1.0, neginf=-1.0)
-        return sdf.flatten().astype(np.float32).clip(-self.tsdf_threshold, self.tsdf_threshold) / self.tsdf_threshold
-    def get_data(self, index):
-        ret = self._load_shape(index)
-        return ret
-    def __getitem__(self, index):
-        return self.get_data(index)
-    def collate(self, batch):
-        batch = torch.utils.data.default_collate(batch)
-        return batch
-class ObjaverseDataModule(pl.LightningDataModule):
-    def __init__(
-        self,
-        batch_size: int = 1,
-        num_workers: int = 4,
-        val_num_workers: int = 2,
-        training_data_list: str = None,
-        sample_pcd_dir: str = None,
-        pc_size: int = 2048,
-        pc_sharpedge_size: int = 2048,
-        sup_near_uni_size: int = 4096,
-        sup_near_sharp_size: int = 4096,
-        sup_space_size: int = 4096,
-        tsdf_threshold: float = 0.05,
-        sharpedge_label: bool = False,
-        return_normal: bool = False,
-    ):
-        super().__init__()
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-        self.val_num_workers = val_num_workers
-        self.training_data_list = training_data_list
-        self.sample_pcd_dir = sample_pcd_dir
-        self.pc_size = pc_size
-        self.pc_sharpedge_size = pc_sharpedge_size
-        self.sharpedge_label = sharpedge_label
-        self.return_normal = return_normal
-        self.sup_near_uni_size = sup_near_uni_size
-        self.sup_near_sharp_size = sup_near_sharp_size
-        self.sup_space_size = sup_space_size
-        self.tsdf_threshold = tsdf_threshold
-    def train_dataloader(self):
-        asl_params = {
-            "data_json": f'{self.training_data_list}/train.json',
-            "sample_root": self.sample_pcd_dir,
-            "pc_size": self.pc_size,
-            "pc_sharpedge_size": self.pc_sharpedge_size,
-            "sup_near_uni_size": self.sup_near_uni_size,
-            "sup_near_sharp_size": self.sup_near_sharp_size,
-            "sup_space_size": self.sup_space_size,
-            "tsdf_threshold": self.tsdf_threshold,
-            "sharpedge_label": self.sharpedge_label,
-            "return_normal": self.return_normal,
-        }
-        dataset = ObjaverseDataset(**asl_params)
-        return torch.utils.data.DataLoader(
-            dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            pin_memory=True,
-            drop_last=True,
-        )
-    def val_dataloader(self):
-        asl_params = {
-            "data_json": f'{self.training_data_list}/val.json',
-            "sample_root": self.sample_pcd_dir,
-            "pc_size": self.pc_size,
-            "pc_sharpedge_size": self.pc_sharpedge_size,
-            "sup_near_uni_size": self.sup_near_uni_size,
-            "sup_near_sharp_size": self.sup_near_sharp_size,
-            "sup_space_size": self.sup_space_size,
-            "tsdf_threshold": self.tsdf_threshold,
-            "sharpedge_label": self.sharpedge_label,
-            "return_normal": self.return_normal,
-        }
-        dataset = ObjaverseDataset(**asl_params)
-        return torch.utils.data.DataLoader(
-            dataset,
-            batch_size=self.batch_size,
-            num_workers=self.val_num_workers,
-            pin_memory=True,
-            drop_last=True,
-        )

ultrashape/data/utils.py DELETED Viewed

@@ -1,193 +0,0 @@
-# -*- coding: utf-8 -*-
-# ==============================================================================
-# Original work Copyright (c) 2025 Tencent.
-# Modified work Copyright (c) 2025 UltraShape Team.
-#
-# Modified by UltraShape on 2025.12.25
-# ==============================================================================
-# Copyright (c) 2017-2021 NVIDIA CORPORATION. All rights reserved.
-# This file is part of the WebDataset library.
-# See the LICENSE file for licensing terms (BSD-style).
-"""Miscellaneous utility functions."""
-import importlib
-import itertools as itt
-import os
-import re
-import sys
-from typing import Any, Callable, Iterator, Union
-import torch
-import numpy as np
-def make_seed(*args):
-    seed = 0
-    for arg in args:
-        seed = (seed * 31 + hash(arg)) & 0x7FFFFFFF
-    return seed
-class PipelineStage:
-    def invoke(self, *args, **kw):
-        raise NotImplementedError
-def identity(x: Any) -> Any:
-    """Return the argument as is."""
-    return x
-def safe_eval(s: str, expr: str = "{}"):
-    """Evaluate the given expression more safely."""
-    if re.sub("[^A-Za-z0-9_]", "", s) != s:
-        raise ValueError(f"safe_eval: illegal characters in: '{s}'")
-    return eval(expr.format(s))
-def lookup_sym(sym: str, modules: list):
-    """Look up a symbol in a list of modules."""
-    for mname in modules:
-        module = importlib.import_module(mname, package="webdataset")
-        result = getattr(module, sym, None)
-        if result is not None:
-            return result
-    return None
-def repeatedly0(
-    loader: Iterator, nepochs: int = sys.maxsize, nbatches: int = sys.maxsize
-):
-    """Repeatedly returns batches from a DataLoader."""
-    for _ in range(nepochs):
-        yield from itt.islice(loader, nbatches)
-def guess_batchsize(batch: Union[tuple, list]):
-    """Guess the batch size by looking at the length of the first element in a tuple."""
-    return len(batch[0])
-def repeatedly(
-    source: Iterator,
-    nepochs: int = None,
-    nbatches: int = None,
-    nsamples: int = None,
-    batchsize: Callable[..., int] = guess_batchsize,
-):
-    """Repeatedly yield samples from an iterator."""
-    epoch = 0
-    batch = 0
-    total = 0
-    while True:
-        for sample in source:
-            yield sample
-            batch += 1
-            if nbatches is not None and batch >= nbatches:
-                return
-            if nsamples is not None:
-                total += guess_batchsize(sample)
-                if total >= nsamples:
-                    return
-        epoch += 1
-        if nepochs is not None and epoch >= nepochs:
-            return
-def pytorch_worker_info(group=None):  # sourcery skip: use-contextlib-suppress
-    """Return node and worker info for PyTorch and some distributed environments."""
-    rank = 0
-    world_size = 1
-    worker = 0
-    num_workers = 1
-    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
-        rank = int(os.environ["RANK"])
-        world_size = int(os.environ["WORLD_SIZE"])
-    else:
-        try:
-            import torch.distributed
-            if torch.distributed.is_available() and torch.distributed.is_initialized():
-                group = group or torch.distributed.group.WORLD
-                rank = torch.distributed.get_rank(group=group)
-                world_size = torch.distributed.get_world_size(group=group)
-        except ModuleNotFoundError:
-            pass
-    if "WORKER" in os.environ and "NUM_WORKERS" in os.environ:
-        worker = int(os.environ["WORKER"])
-        num_workers = int(os.environ["NUM_WORKERS"])
-    else:
-        try:
-            import torch.utils.data
-            worker_info = torch.utils.data.get_worker_info()
-            if worker_info is not None:
-                worker = worker_info.id
-                num_workers = worker_info.num_workers
-        except ModuleNotFoundError:
-            pass
-    return rank, world_size, worker, num_workers
-def pytorch_worker_seed(group=None):
-    """Compute a distinct, deterministic RNG seed for each worker and node."""
-    rank, world_size, worker, num_workers = pytorch_worker_info(group=group)
-    return rank * 1000 + worker
-def worker_init_fn(_):
-    worker_info = torch.utils.data.get_worker_info()
-    worker_id = worker_info.id
-    # dataset = worker_info.dataset
-    # split_size = dataset.num_records // worker_info.num_workers
-    # # reset num_records to the true number to retain reliable length information
-    # dataset.sample_ids = dataset.valid_ids[worker_id * split_size:(worker_id + 1) * split_size]
-    # current_id = np.random.choice(len(np.random.get_state()[1]), 1)
-    # return np.random.seed(np.random.get_state()[1][current_id] + worker_id)
-    return np.random.seed(np.random.get_state()[1][0] + worker_id)
-def collation_fn(samples, combine_tensors=True, combine_scalars=True):
-    """
-    Args:
-        samples (list[dict]):
-        combine_tensors:
-        combine_scalars:
-    Returns:
-    """
-    result = {}
-    keys = samples[0].keys()
-    for key in keys:
-        result[key] = []
-    for sample in samples:
-        for key in keys:
-            val = sample[key]
-            result[key].append(val)
-    for key in keys:
-        val_list = result[key]
-        if isinstance(val_list[0], (int, float)):
-            if combine_scalars:
-                result[key] = np.array(result[key])
-        elif isinstance(val_list[0], torch.Tensor):
-            if combine_tensors:
-                result[key] = torch.stack(val_list)
-        elif isinstance(val_list[0], np.ndarray):
-            if combine_tensors:
-                result[key] = np.stack(val_list)
-    return result

ultrashape/models/__init__.py DELETED Viewed

@@ -1,27 +0,0 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-from .autoencoders import ShapeVAE
-from .conditioner_mask import DualImageEncoder, SingleImageEncoder, DinoImageEncoder, CLIPImageEncoder
-from .denoisers import RefineDiT

ultrashape/models/autoencoders/__init__.py DELETED Viewed

@@ -1,21 +0,0 @@
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-from .attention_blocks import CrossAttentionDecoder
-from .attention_processors import FlashVDMCrossAttentionProcessor, CrossAttentionProcessor, \
-    FlashVDMTopMCrossAttentionProcessor
-from .model import ShapeVAE, VectsetVAE
-from .surface_extractors import SurfaceExtractors, MCSurfaceExtractor, DMCSurfaceExtractor, Latent2MeshOutput
-from .volume_decoders import HierarchicalVolumeDecoding, FlashVDMVolumeDecoding, VanillaVolumeDecoder
-from .vae_trainer import VAETrainer

ultrashape/models/autoencoders/attention_blocks.py DELETED Viewed

@@ -1,711 +0,0 @@
-# ==============================================================================
-# Original work Copyright (c) 2025 Tencent.
-# Modified work Copyright (c) 2025 UltraShape Team.
-#
-# Modified by UltraShape on 2025.12.25
-# ==============================================================================
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import os
-from typing import Optional, Union, List
-import torch
-import torch.nn as nn
-from einops import rearrange
-from torch import Tensor
-from .attention_processors import CrossAttentionProcessor
-from ...utils import logger
-from ultrashape.utils import voxelize_from_point
-scaled_dot_product_attention = nn.functional.scaled_dot_product_attention
-if os.environ.get('USE_SAGEATTN', '0') == '1':
-    try:
-        from sageattention import sageattn
-    except ImportError:
-        raise ImportError('Please install the package "sageattention" to use this USE_SAGEATTN.')
-    scaled_dot_product_attention = sageattn
-class FourierEmbedder(nn.Module):
-    """ The sin/cosine positional embedding. """
-    def __init__(self,
-                 num_freqs: int = 6,
-                 logspace: bool = True,
-                 input_dim: int = 3,
-                 include_input: bool = True,
-                 include_pi: bool = True) -> None:
-        super().__init__()
-        if logspace:
-            frequencies = 2.0 ** torch.arange(
-                num_freqs,
-                dtype=torch.float32
-            )
-        else:
-            frequencies = torch.linspace(
-                1.0,
-                2.0 ** (num_freqs - 1),
-                num_freqs,
-                dtype=torch.float32
-            )
-        if include_pi:
-            frequencies *= torch.pi
-        self.register_buffer("frequencies", frequencies, persistent=False)
-        self.include_input = include_input
-        self.num_freqs = num_freqs
-        self.out_dim = self.get_dims(input_dim)
-    def get_dims(self, input_dim):
-        temp = 1 if self.include_input or self.num_freqs == 0 else 0
-        out_dim = input_dim * (self.num_freqs * 2 + temp)
-        return out_dim
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """ Forward process.
-        Args:
-            x: tensor of shape [..., dim]
-        Returns:
-            embedding: an embedding of `x` of shape [..., dim * (num_freqs * 2 + temp)]
-                where temp is 1 if include_input is True and 0 otherwise.
-        """
-        if self.num_freqs > 0:
-            embed = (x[..., None].contiguous() * self.frequencies).view(*x.shape[:-1], -1)
-            if self.include_input:
-                return torch.cat((x, embed.sin(), embed.cos()), dim=-1)
-            else:
-                return torch.cat((embed.sin(), embed.cos()), dim=-1)
-        else:
-            return x
-class DropPath(nn.Module):
-    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
-    """
-    def __init__(self, drop_prob: float = 0., scale_by_keep: bool = True):
-        super(DropPath, self).__init__()
-        self.drop_prob = drop_prob
-        self.scale_by_keep = scale_by_keep
-    def forward(self, x):
-        """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
-        This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
-        the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
-        See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
-        changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
-        'survival rate' as the argument.
-        """
-        if self.drop_prob == 0. or not self.training:
-            return x
-        keep_prob = 1 - self.drop_prob
-        shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
-        random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
-        if keep_prob > 0.0 and self.scale_by_keep:
-            random_tensor.div_(keep_prob)
-        return x * random_tensor
-    def extra_repr(self):
-        return f'drop_prob={round(self.drop_prob, 3):0.3f}'
-class MLP(nn.Module):
-    def __init__(
-        self, *,
-        width: int,
-        expand_ratio: int = 4,
-        output_width: int = None,
-        drop_path_rate: float = 0.0
-    ):
-        super().__init__()
-        self.width = width
-        self.c_fc = nn.Linear(width, width * expand_ratio)
-        self.c_proj = nn.Linear(width * expand_ratio, output_width if output_width is not None else width)
-        self.gelu = nn.GELU()
-        self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
-    def forward(self, x):
-        return self.drop_path(self.c_proj(self.gelu(self.c_fc(x))))
-class QKVMultiheadCrossAttention(nn.Module):
-    def __init__(
-        self,
-        *,
-        heads: int,
-        n_data: Optional[int] = None,
-        width=None,
-        qk_norm=False,
-        norm_layer=nn.LayerNorm
-    ):
-        super().__init__()
-        self.heads = heads
-        self.n_data = n_data
-        self.q_norm = norm_layer(width // heads, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity()
-        self.k_norm = norm_layer(width // heads, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity()
-        self.attn_processor = CrossAttentionProcessor()
-    def forward(self, q, kv):
-        _, n_ctx, _ = q.shape
-        bs, n_data, width = kv.shape
-        attn_ch = width // self.heads // 2
-        q = q.view(bs, n_ctx, self.heads, -1)
-        kv = kv.view(bs, n_data, self.heads, -1)
-        k, v = torch.split(kv, attn_ch, dim=-1)
-        q = self.q_norm(q)
-        k = self.k_norm(k)
-        q, k, v = map(lambda t: rearrange(t, 'b n h d -> b h n d', h=self.heads), (q, k, v))
-        out = self.attn_processor(self, q, k, v)
-        out = out.transpose(1, 2).reshape(bs, n_ctx, -1)
-        return out
-class MultiheadCrossAttention(nn.Module):
-    def __init__(
-        self,
-        *,
-        width: int,
-        heads: int,
-        qkv_bias: bool = True,
-        n_data: Optional[int] = None,
-        data_width: Optional[int] = None,
-        norm_layer=nn.LayerNorm,
-        qk_norm: bool = False,
-        kv_cache: bool = False,
-    ):
-        super().__init__()
-        self.n_data = n_data
-        self.width = width
-        self.heads = heads
-        self.data_width = width if data_width is None else data_width
-        self.c_q = nn.Linear(width, width, bias=qkv_bias)
-        self.c_kv = nn.Linear(self.data_width, width * 2, bias=qkv_bias)
-        self.c_proj = nn.Linear(width, width)
-        self.attention = QKVMultiheadCrossAttention(
-            heads=heads,
-            n_data=n_data,
-            width=width,
-            norm_layer=norm_layer,
-            qk_norm=qk_norm
-        )
-        self.kv_cache = kv_cache
-        self.data = None
-    def forward(self, x, data):
-        x = self.c_q(x)
-        if self.kv_cache:
-            if self.data is None:
-                self.data = self.c_kv(data)
-                logger.info('Save kv cache,this should be called only once for one mesh')
-            data = self.data
-        else:
-            data = self.c_kv(data)
-        x = self.attention(x, data)
-        x = self.c_proj(x)
-        return x
-class ResidualCrossAttentionBlock(nn.Module):
-    def __init__(
-        self,
-        *,
-        n_data: Optional[int] = None,
-        width: int,
-        heads: int,
-        mlp_expand_ratio: int = 4,
-        data_width: Optional[int] = None,
-        qkv_bias: bool = True,
-        norm_layer=nn.LayerNorm,
-        qk_norm: bool = False
-    ):
-        super().__init__()
-        if data_width is None:
-            data_width = width
-        self.attn = MultiheadCrossAttention(
-            n_data=n_data,
-            width=width,
-            heads=heads,
-            data_width=data_width,
-            qkv_bias=qkv_bias,
-            norm_layer=norm_layer,
-            qk_norm=qk_norm
-        )
-        self.ln_1 = norm_layer(width, elementwise_affine=True, eps=1e-6)
-        self.ln_2 = norm_layer(data_width, elementwise_affine=True, eps=1e-6)
-        self.ln_3 = norm_layer(width, elementwise_affine=True, eps=1e-6)
-        self.mlp = MLP(width=width, expand_ratio=mlp_expand_ratio)
-    def forward(self, x: torch.Tensor, data: torch.Tensor):
-        x = x + self.attn(self.ln_1(x), self.ln_2(data))
-        x = x + self.mlp(self.ln_3(x))
-        return x
-class QKVMultiheadAttention(nn.Module):
-    def __init__(
-        self,
-        *,
-        heads: int,
-        n_ctx: int,
-        width=None,
-        qk_norm=False,
-        norm_layer=nn.LayerNorm
-    ):
-        super().__init__()
-        self.heads = heads
-        self.n_ctx = n_ctx
-        self.q_norm = norm_layer(width // heads, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity()
-        self.k_norm = norm_layer(width // heads, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity()
-    def forward(self, qkv):
-        bs, n_ctx, width = qkv.shape
-        attn_ch = width // self.heads // 3
-        qkv = qkv.view(bs, n_ctx, self.heads, -1)
-        q, k, v = torch.split(qkv, attn_ch, dim=-1)
-        q = self.q_norm(q)
-        k = self.k_norm(k)
-        q, k, v = map(lambda t: rearrange(t, 'b n h d -> b h n d', h=self.heads), (q, k, v))
-        out = scaled_dot_product_attention(q, k, v).transpose(1, 2).reshape(bs, n_ctx, -1)
-        return out
-class MultiheadAttention(nn.Module):
-    def __init__(
-        self,
-        *,
-        n_ctx: int,
-        width: int,
-        heads: int,
-        qkv_bias: bool,
-        norm_layer=nn.LayerNorm,
-        qk_norm: bool = False,
-        drop_path_rate: float = 0.0
-    ):
-        super().__init__()
-        self.n_ctx = n_ctx
-        self.width = width
-        self.heads = heads
-        self.c_qkv = nn.Linear(width, width * 3, bias=qkv_bias)
-        self.c_proj = nn.Linear(width, width)
-        self.attention = QKVMultiheadAttention(
-            heads=heads,
-            n_ctx=n_ctx,
-            width=width,
-            norm_layer=norm_layer,
-            qk_norm=qk_norm
-        )
-        self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
-    def forward(self, x):
-        x = self.c_qkv(x)
-        x = self.attention(x)
-        x = self.drop_path(self.c_proj(x))
-        return x
-class ResidualAttentionBlock(nn.Module):
-    def __init__(
-        self,
-        *,
-        n_ctx: int,
-        width: int,
-        heads: int,
-        qkv_bias: bool = True,
-        norm_layer=nn.LayerNorm,
-        qk_norm: bool = False,
-        drop_path_rate: float = 0.0,
-    ):
-        super().__init__()
-        self.attn = MultiheadAttention(
-            n_ctx=n_ctx,
-            width=width,
-            heads=heads,
-            qkv_bias=qkv_bias,
-            norm_layer=norm_layer,
-            qk_norm=qk_norm,
-            drop_path_rate=drop_path_rate
-        )
-        self.ln_1 = norm_layer(width, elementwise_affine=True, eps=1e-6)
-        self.mlp = MLP(width=width, drop_path_rate=drop_path_rate)
-        self.ln_2 = norm_layer(width, elementwise_affine=True, eps=1e-6)
-    def forward(self, x: torch.Tensor):
-        x = x + self.attn(self.ln_1(x))
-        x = x + self.mlp(self.ln_2(x))
-        return x
-class Transformer(nn.Module):
-    def __init__(
-        self,
-        *,
-        n_ctx: int,
-        width: int,
-        layers: int,
-        heads: int,
-        qkv_bias: bool = True,
-        norm_layer=nn.LayerNorm,
-        qk_norm: bool = False,
-        drop_path_rate: float = 0.0
-    ):
-        super().__init__()
-        self.n_ctx = n_ctx
-        self.width = width
-        self.layers = layers
-        self.resblocks = nn.ModuleList(
-            [
-                ResidualAttentionBlock(
-                    n_ctx=n_ctx,
-                    width=width,
-                    heads=heads,
-                    qkv_bias=qkv_bias,
-                    norm_layer=norm_layer,
-                    qk_norm=qk_norm,
-                    drop_path_rate=drop_path_rate
-                )
-                for _ in range(layers)
-            ]
-        )
-    def forward(self, x: torch.Tensor):
-        for block in self.resblocks:
-            x = block(x)
-        return x
-class CrossAttentionDecoder(nn.Module):
-    def __init__(
-        self,
-        *,
-        num_latents: int,
-        out_channels: int,
-        fourier_embedder: FourierEmbedder,
-        width: int,
-        heads: int,
-        mlp_expand_ratio: int = 4,
-        downsample_ratio: int = 1,
-        enable_ln_post: bool = True,
-        qkv_bias: bool = True,
-        qk_norm: bool = False,
-        label_type: str = "binary"
-    ):
-        super().__init__()
-        self.enable_ln_post = enable_ln_post
-        self.fourier_embedder = fourier_embedder
-        self.downsample_ratio = downsample_ratio
-        self.query_proj = nn.Linear(self.fourier_embedder.out_dim, width)
-        if self.downsample_ratio != 1:
-            self.latents_proj = nn.Linear(width * downsample_ratio, width)
-        if self.enable_ln_post == False:
-            qk_norm = False
-        self.cross_attn_decoder = ResidualCrossAttentionBlock(
-            n_data=num_latents,
-            width=width,
-            mlp_expand_ratio=mlp_expand_ratio,
-            heads=heads,
-            qkv_bias=qkv_bias,
-            qk_norm=qk_norm
-        )
-        if self.enable_ln_post:
-            self.ln_post = nn.LayerNorm(width)
-        self.output_proj = nn.Linear(width, out_channels)
-        self.label_type = label_type
-        self.count = 0
-    def set_cross_attention_processor(self, processor):
-        self.cross_attn_decoder.attn.attention.attn_processor = processor
-    def set_default_cross_attention_processor(self):
-        self.cross_attn_decoder.attn.attention.attn_processor = CrossAttentionProcessor
-    def forward(self, queries=None, query_embeddings=None, latents=None):
-        if query_embeddings is None:
-            query_embeddings = self.query_proj(self.fourier_embedder(queries).to(latents.dtype))
-        self.count += query_embeddings.shape[1]
-        if self.downsample_ratio != 1:
-            latents = self.latents_proj(latents)
-        x = self.cross_attn_decoder(query_embeddings, latents)
-        if self.enable_ln_post:
-            x = self.ln_post(x)
-        occ = self.output_proj(x)
-        return occ
-def fps(
-    src: torch.Tensor,
-    batch: Optional[Tensor] = None,
-    ratio: Optional[Union[Tensor, float]] = None,
-    random_start: bool = True,
-    batch_size: Optional[int] = None,
-    ptr: Optional[Union[Tensor, List[int]]] = None,
-):
-    src = src.float()
-    from torch_cluster import fps as fps_fn
-    output = fps_fn(src, batch, ratio, random_start, batch_size, ptr)
-    return output
-class PointCrossAttentionEncoder(nn.Module):
-    def __init__(
-        self, *,
-        num_latents: int,
-        downsample_ratio: float,
-        pc_size: int,
-        pc_sharpedge_size: int,
-        fourier_embedder: FourierEmbedder,
-        point_feats: int,
-        width: int,
-        heads: int,
-        layers: int,
-        voxel_query_res: int,
-        normal_pe: bool = False,
-        qkv_bias: bool = True,
-        use_ln_post: bool = False,
-        use_checkpoint: bool = False,
-        qk_norm: bool = False,
-        jitter_query: bool = False,
-        voxel_query: bool = False,
-    ):
-        super().__init__()
-        self.use_checkpoint = use_checkpoint
-        self.num_latents = num_latents
-        self.downsample_ratio = downsample_ratio
-        self.point_feats = point_feats
-        self.normal_pe = normal_pe
-        self.jitter_query = jitter_query
-        self.voxel_query = voxel_query
-        self.voxel_query_res = voxel_query_res
-        if pc_sharpedge_size == 0:
-            print(
-                f'PointCrossAttentionEncoder INFO: pc_sharpedge_size is zero')
-        else:
-            print(
-                f'PointCrossAttentionEncoder INFO: pc_sharpedge_size is given, using pc_size={pc_size}, pc_sharpedge_size={pc_sharpedge_size}')
-        self.pc_size = pc_size
-        self.pc_sharpedge_size = pc_sharpedge_size
-        self.fourier_embedder = fourier_embedder
-        if self.jitter_query or self.voxel_query:
-            self.input_proj_q = nn.Linear(self.fourier_embedder.out_dim, width)
-            self.input_proj_kv = nn.Linear(self.fourier_embedder.out_dim + point_feats, width)
-        else:
-            self.input_proj = nn.Linear(self.fourier_embedder.out_dim + point_feats, width)
-        self.cross_attn = ResidualCrossAttentionBlock(
-            width=width,
-            heads=heads,
-            qkv_bias=qkv_bias,
-            qk_norm=qk_norm
-        )
-        self.self_attn = None
-        if layers > 0:
-            self.self_attn = Transformer(
-                n_ctx=num_latents,
-                width=width,
-                layers=layers,
-                heads=heads,
-                qkv_bias=qkv_bias,
-                qk_norm=qk_norm
-            )
-        if use_ln_post:
-            self.ln_post = nn.LayerNorm(width)
-        else:
-            self.ln_post = None
-    def sample_points_and_latents(self, pc: torch.FloatTensor, feats: Optional[torch.FloatTensor] = None):
-        B, N, D = pc.shape
-        num_pts = self.num_latents * self.downsample_ratio
-        # Compute number of latents
-        num_latents = int(num_pts / self.downsample_ratio)
-        # Compute the number of random and sharpedge latents
-        num_random_query = self.pc_size / (self.pc_size + self.pc_sharpedge_size) * num_latents
-        num_sharpedge_query = num_latents - num_random_query
-        # Split random and sharpedge surface points
-        random_pc, sharpedge_pc = torch.split(pc, [self.pc_size, self.pc_sharpedge_size], dim=1)
-        assert random_pc.shape[1] <= self.pc_size, "Random surface points size must be less than or equal to pc_size"
-        assert sharpedge_pc.shape[1] <= self.pc_sharpedge_size, "Sharpedge surface points size must be less than or equal to pc_sharpedge_size"
-        # Randomly select random surface points and random query points
-        input_random_pc_size = int(num_random_query * self.downsample_ratio)
-        random_query_ratio = num_random_query / input_random_pc_size
-        idx_random_pc = torch.randperm(random_pc.shape[1], device=random_pc.device)[:input_random_pc_size]
-        input_random_pc = random_pc[:, idx_random_pc, :]
-        if self.voxel_query:
-            query_random_pc, query_voxel_indices = voxelize_from_point(pc, num_latents, resolution=self.voxel_query_res)
-        else:
-            flatten_input_random_pc = input_random_pc.view(B * input_random_pc_size, D)
-            N_down = int(flatten_input_random_pc.shape[0] / B)
-            batch_down = torch.arange(B).to(pc.device)
-            batch_down = torch.repeat_interleave(batch_down, N_down)
-            idx_query_random = fps(flatten_input_random_pc, batch_down, ratio=random_query_ratio)
-            query_random_pc = flatten_input_random_pc[idx_query_random].view(B, -1, D)
-        # Randomly select sharpedge surface points and sharpedge query points
-        input_sharpedge_pc_size = int(num_sharpedge_query * self.downsample_ratio)
-        if input_sharpedge_pc_size == 0 or self.voxel_query:
-            input_sharpedge_pc = torch.zeros(B, 0, D, dtype=input_random_pc.dtype).to(pc.device)
-            query_sharpedge_pc = torch.zeros(B, 0, D, dtype=query_random_pc.dtype).to(pc.device)
-        else:
-            sharpedge_query_ratio = num_sharpedge_query / input_sharpedge_pc_size
-            idx_sharpedge_pc = torch.randperm(sharpedge_pc.shape[1], device=sharpedge_pc.device)[
-                            :input_sharpedge_pc_size]
-            input_sharpedge_pc = sharpedge_pc[:, idx_sharpedge_pc, :]
-            flatten_input_sharpedge_surface_points = input_sharpedge_pc.view(B * input_sharpedge_pc_size, D)
-            N_down = int(flatten_input_sharpedge_surface_points.shape[0] / B)
-            batch_down = torch.arange(B).to(pc.device)
-            batch_down = torch.repeat_interleave(batch_down, N_down)
-            idx_query_sharpedge = fps(flatten_input_sharpedge_surface_points, batch_down, ratio=sharpedge_query_ratio)
-            query_sharpedge_pc = flatten_input_sharpedge_surface_points[idx_query_sharpedge].view(B, -1, D)
-        # Concatenate random and sharpedge surface points and query points
-        query_pc = torch.cat([query_random_pc, query_sharpedge_pc], dim=1)
-        input_pc = torch.cat([input_random_pc, input_sharpedge_pc], dim=1)
-        if self.jitter_query:
-            R = self.voxel_query_res // 2
-            noise = torch.rand_like(query_pc)
-            query_pc += (noise - 0.5) / R
-        # PE
-        query = self.fourier_embedder(query_pc)
-        data = self.fourier_embedder(input_pc)
-        # Concat normal if given
-        if self.point_feats != 0:
-            random_surface_feats, sharpedge_surface_feats = torch.split(feats, [self.pc_size, self.pc_sharpedge_size],
-                                                                        dim=1)
-            input_random_surface_feats = random_surface_feats[:, idx_random_pc, :]
-            if not self.voxel_query and not self.jitter_query:
-                flatten_input_random_surface_feats = input_random_surface_feats.view(B * input_random_pc_size, -1)
-                query_random_feats = flatten_input_random_surface_feats[idx_query_random].view(B, -1,
-                                                                                           flatten_input_random_surface_feats.shape[
-                                                                                               -1])
-            if input_sharpedge_pc_size == 0:
-                input_sharpedge_surface_feats = torch.zeros(B, 0, self.point_feats,
-                                                            dtype=input_random_surface_feats.dtype).to(pc.device)
-                if not self.voxel_query and not self.jitter_query:
-                    query_sharpedge_feats = torch.zeros(B, 0, self.point_feats, dtype=query_random_feats.dtype).to(
-                        pc.device)
-            else:
-                input_sharpedge_surface_feats = sharpedge_surface_feats[:, idx_sharpedge_pc, :]
-                if not self.voxel_query and not self.jitter_query:
-                    flatten_input_sharpedge_surface_feats = input_sharpedge_surface_feats.view(B * input_sharpedge_pc_size,
-                                                                                            -1)
-                    query_sharpedge_feats = flatten_input_sharpedge_surface_feats[idx_query_sharpedge].view(B, -1,
-                                                                                                        flatten_input_sharpedge_surface_feats.shape[
-                                                                                                            -1])
-            if not self.voxel_query and not self.jitter_query:
-                query_feats = torch.cat([query_random_feats, query_sharpedge_feats], dim=1)
-            input_feats = torch.cat([input_random_surface_feats, input_sharpedge_surface_feats], dim=1)
-            if self.normal_pe:
-                if not self.voxel_query and not self.jitter_query:
-                    query_normal_pe = self.fourier_embedder(query_feats[..., :3])
-                    query_feats = torch.cat([query_normal_pe, query_feats[..., 3:]], dim=-1)
-                input_normal_pe = self.fourier_embedder(input_feats[..., :3])
-                input_feats = torch.cat([input_normal_pe, input_feats[..., 3:]], dim=-1)
-            if not self.voxel_query and not self.jitter_query:
-                query = torch.cat([query, query_feats], dim=-1)
-            data = torch.cat([data, input_feats], dim=-1)
-        if input_sharpedge_pc_size == 0:
-            query_sharpedge_pc = torch.zeros(B, 1, D).to(pc.device)
-            input_sharpedge_pc = torch.zeros(B, 1, D).to(pc.device)
-        if self.voxel_query:
-            pc_infos = [query_voxel_indices, query_random_pc]
-        else:
-            pc_infos = [query_pc, input_pc, query_random_pc, input_random_pc, query_sharpedge_pc, input_sharpedge_pc]
-        return query.view(B, -1, query.shape[-1]), data.view(B, -1, data.shape[-1]), pc_infos
-    def forward(self, pc, feats):
-        """
-        Args:
-            pc (torch.FloatTensor): [B, N, 3]
-            feats (torch.FloatTensor or None): [B, N, C]
-        Returns:
-        """
-        query, data, pc_infos = self.sample_points_and_latents(pc, feats)
-        if self.jitter_query or self.voxel_query:
-            query = self.input_proj_q(query)
-            query = query
-            data = self.input_proj_kv(data)
-            data = data
-        else:
-            query = self.input_proj(query)
-            query = query
-            data = self.input_proj(data)
-            data = data
-        latents = self.cross_attn(query, data)
-        if self.self_attn is not None:
-            latents = self.self_attn(latents)
-        if self.ln_post is not None:
-            latents = self.ln_post(latents)
-        return latents, pc_infos

ultrashape/models/autoencoders/attention_processors.py DELETED Viewed

@@ -1,103 +0,0 @@
-# ==============================================================================
-# Original work Copyright (c) 2025 Tencent.
-# Modified work Copyright (c) 2025 UltraShape Team.
-#
-# Modified by UltraShape on 2025.12.25
-# ==============================================================================
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import os
-import torch
-import torch.nn.functional as F
-scaled_dot_product_attention = F.scaled_dot_product_attention
-if os.environ.get('CA_USE_SAGEATTN', '0') == '1':
-    try:
-        from sageattention import sageattn
-    except ImportError:
-        raise ImportError('Please install the package "sageattention" to use this USE_SAGEATTN.')
-    scaled_dot_product_attention = sageattn
-class CrossAttentionProcessor:
-    def __call__(self, attn, q, k, v):
-        out = scaled_dot_product_attention(q, k, v)
-        return out
-class FlashVDMCrossAttentionProcessor:
-    def __init__(self, topk=None):
-        self.topk = topk
-    def __call__(self, attn, q, k, v):
-        if k.shape[-2] == 3072:
-            topk = 1024
-        elif k.shape[-2] == 512:
-            topk = 256
-        else:
-            topk = k.shape[-2] // 3
-        if self.topk is True:
-            q1 = q[:, :, ::100, :]
-            sim = q1 @ k.transpose(-1, -2)
-            sim = torch.mean(sim, -2)
-            topk_ind = torch.topk(sim, dim=-1, k=topk).indices.squeeze(-2).unsqueeze(-1)
-            topk_ind = topk_ind.expand(-1, -1, -1, v.shape[-1])
-            v0 = torch.gather(v, dim=-2, index=topk_ind)
-            k0 = torch.gather(k, dim=-2, index=topk_ind)
-            out = scaled_dot_product_attention(q, k0, v0)
-        elif self.topk is False:
-            out = scaled_dot_product_attention(q, k, v)
-        else:
-            idx, counts = self.topk
-            start = 0
-            outs = []
-            for grid_coord, count in zip(idx, counts):
-                end = start + count
-                q_chunk = q[:, :, start:end, :]
-                k0, v0 = self.select_topkv(q_chunk, k, v, topk)
-                out = scaled_dot_product_attention(q_chunk, k0, v0)
-                outs.append(out)
-                start += count
-            out = torch.cat(outs, dim=-2)
-        self.topk = False
-        return out
-    def select_topkv(self, q_chunk, k, v, topk):
-        q1 = q_chunk[:, :, ::50, :]
-        sim = q1 @ k.transpose(-1, -2)
-        sim = torch.mean(sim, -2)
-        topk_ind = torch.topk(sim, dim=-1, k=topk).indices.squeeze(-2).unsqueeze(-1)
-        topk_ind = topk_ind.expand(-1, -1, -1, v.shape[-1])
-        v0 = torch.gather(v, dim=-2, index=topk_ind)
-        k0 = torch.gather(k, dim=-2, index=topk_ind)
-        return k0, v0
-class FlashVDMTopMCrossAttentionProcessor(FlashVDMCrossAttentionProcessor):
-    def select_topkv(self, q_chunk, k, v, topk):
-        q1 = q_chunk[:, :, ::30, :]
-        sim = q1 @ k.transpose(-1, -2)
-        # sim = sim.to(torch.float32)
-        sim = sim.softmax(-1)
-        sim = torch.mean(sim, 1)
-        activated_token = torch.where(sim > 1e-6)[2]
-        index = torch.unique(activated_token, return_counts=True)[0].unsqueeze(0).unsqueeze(0).unsqueeze(-1)
-        index = index.expand(-1, v.shape[1], -1, v.shape[-1])
-        v0 = torch.gather(v, dim=-2, index=index)
-        k0 = torch.gather(k, dim=-2, index=index)
-        return k0, v0

ultrashape/models/autoencoders/model.py DELETED Viewed

@@ -1,377 +0,0 @@
-# ==============================================================================
-# Original work Copyright (c) 2025 Tencent.
-# Modified work Copyright (c) 2025 UltraShape Team.
-#
-# Modified by UltraShape on 2025.12.25
-# ==============================================================================
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import os
-from typing import Union, List
-import numpy as np
-import torch
-import torch.nn as nn
-import yaml
-from .attention_blocks import FourierEmbedder, Transformer, CrossAttentionDecoder, PointCrossAttentionEncoder
-from .surface_extractors import MCSurfaceExtractor, SurfaceExtractors
-from .volume_decoders import VanillaVolumeDecoder, FlashVDMVolumeDecoding, HierarchicalVolumeDecoding
-from ...utils import logger, synchronize_timer, smart_load_model
-class DiagonalGaussianDistribution(object):
-    def __init__(self, parameters: Union[torch.Tensor, List[torch.Tensor]], deterministic=False, feat_dim=1):
-        """
-        Initialize a diagonal Gaussian distribution with mean and log-variance parameters.
-        Args:
-            parameters (Union[torch.Tensor, List[torch.Tensor]]):
-                Either a single tensor containing concatenated mean and log-variance along `feat_dim`,
-                or a list of two tensors [mean, logvar].
-            deterministic (bool, optional): If True, the distribution is deterministic (zero variance).
-                Default is False. feat_dim (int, optional): Dimension along which mean and logvar are
-                concatenated if parameters is a single tensor. Default is 1.
-        """
-        self.feat_dim = feat_dim
-        self.parameters = parameters
-        if isinstance(parameters, list):
-            self.mean = parameters[0]
-            self.logvar = parameters[1]
-        else:
-            self.mean, self.logvar = torch.chunk(parameters, 2, dim=feat_dim)
-        self.logvar = torch.clamp(self.logvar, -30.0, 20.0)
-        self.deterministic = deterministic
-        self.std = torch.exp(0.5 * self.logvar)
-        self.var = torch.exp(self.logvar)
-        if self.deterministic:
-            self.var = self.std = torch.zeros_like(self.mean)
-    def sample(self):
-        """
-        Sample from the diagonal Gaussian distribution.
-        Returns:
-            torch.Tensor: A sample tensor with the same shape as the mean.
-        """
-        x = self.mean + self.std * torch.randn_like(self.mean)
-        return x
-    def kl(self, other=None, dims=(1, 2)):
-        """
-        Compute the Kullback-Leibler (KL) divergence between this distribution and another.
-        If `other` is None, compute KL divergence to a standard normal distribution N(0, I).
-        Args:
-            other (DiagonalGaussianDistribution, optional): Another diagonal Gaussian distribution.
-            dims (tuple, optional): Dimensions along which to compute the mean KL divergence.
-                Default is (1, 2, 3).
-        Returns:
-            torch.Tensor: The mean KL divergence value.
-        """
-        if self.deterministic:
-            return torch.Tensor([0.])
-        else:
-            if other is None:
-                return 0.5 * torch.mean(torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar, dim=dims)
-            else:
-                return 0.5 * torch.mean(
-                    torch.pow(self.mean - other.mean, 2) / other.var
-                    + self.var / other.var - 1.0 - self.logvar + other.logvar,
-                    dim=dims)
-    def nll(self, sample, dims=(1, 2, 3)):
-        if self.deterministic:
-            return torch.Tensor([0.])
-        logtwopi = np.log(2.0 * np.pi)
-        return 0.5 * torch.sum(
-            logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var,
-            dim=dims)
-    def mode(self):
-        return self.mean
-class VectsetVAE(nn.Module):
-    @classmethod
-    @synchronize_timer('VectsetVAE Model Loading')
-    def from_single_file(
-        cls,
-        ckpt_path,
-        config_path=None,
-        params=None,
-        device='cuda',
-        dtype=torch.float16,
-        use_safetensors=None,
-        **kwargs,
-    ):
-        # load config
-        with open(config_path, 'r') as f:
-            config = yaml.safe_load(f)
-        # load ckpt
-        if use_safetensors:
-            ckpt_path = ckpt_path.replace('.ckpt', '.safetensors')
-        if not os.path.exists(ckpt_path):
-            raise FileNotFoundError(f"Model file {ckpt_path} not found")
-        logger.info(f"Loading model from {ckpt_path}")
-        if use_safetensors:
-            import safetensors.torch
-            ckpt = safetensors.torch.load_file(ckpt_path, device='cpu')
-        else:
-            ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=True)
-        if params is not None:
-            model_kwargs = params
-        else:
-            model_kwargs = config['params']
-        model_kwargs.update(kwargs)
-        model = cls(**model_kwargs)
-        model.load_state_dict(ckpt)
-        model.to(device=device, dtype=dtype)
-        return model
-    @classmethod
-    def from_pretrained(
-        cls,
-        model_path,
-        device='cuda',
-        params=None,
-        dtype=torch.float16,
-        use_safetensors=False,
-        variant='fp16',
-        subfolder='hunyuan3d-vae-v2-1',
-        **kwargs,
-    ):
-        config_path, ckpt_path = smart_load_model(
-            model_path,
-            subfolder=subfolder,
-            use_safetensors=use_safetensors,
-            variant=variant
-        )
-        return cls.from_single_file(
-            ckpt_path,
-            config_path=config_path,
-            params=params,
-            device=device,
-            dtype=dtype,
-            use_safetensors=use_safetensors,
-            **kwargs
-        )
-    def init_from_ckpt(self, path, ignore_keys=()):
-        state_dict = torch.load(path, map_location="cpu")
-        state_dict = state_dict.get("state_dict", state_dict)
-        keys = list(state_dict.keys())
-        for k in keys:
-            for ik in ignore_keys:
-                if k.startswith(ik):
-                    print("Deleting key {} from state_dict.".format(k))
-                    del state_dict[k]
-        missing, unexpected = self.load_state_dict(state_dict, strict=False)
-        print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys")
-        if len(missing) > 0:
-            print(f"Missing Keys: {missing}")
-            print(f"Unexpected Keys: {unexpected}")
-    def __init__(
-        self,
-        volume_decoder=None,
-        surface_extractor=None
-    ):
-        super().__init__()
-        if volume_decoder is None:
-            volume_decoder = VanillaVolumeDecoder()
-        if surface_extractor is None:
-            surface_extractor = MCSurfaceExtractor()
-        self.volume_decoder = volume_decoder
-        self.surface_extractor = surface_extractor
-    def latents2mesh(self, latents: torch.FloatTensor, **kwargs):
-        with synchronize_timer('Volume decoding'):
-            grid_logits = self.volume_decoder(latents, self.geo_decoder, **kwargs)
-        with synchronize_timer('Surface extraction'):
-            outputs = self.surface_extractor(grid_logits, **kwargs)
-        return outputs, grid_logits
-    def enable_flashvdm_decoder(
-        self,
-        enabled: bool = True,
-        adaptive_kv_selection=True,
-        topk_mode='mean',
-        mc_algo='mc',
-    ):
-        if enabled:
-            if adaptive_kv_selection:
-                self.volume_decoder = FlashVDMVolumeDecoding(topk_mode)
-            else:
-                self.volume_decoder = HierarchicalVolumeDecoding()
-            if mc_algo not in SurfaceExtractors.keys():
-                raise ValueError(f'Unsupported mc_algo {mc_algo}, available:{list(SurfaceExtractors.keys())}')
-            self.surface_extractor = SurfaceExtractors[mc_algo]()
-        else:
-            self.volume_decoder = VanillaVolumeDecoder()
-            self.surface_extractor = MCSurfaceExtractor()
-class ShapeVAE(VectsetVAE):
-    def __init__(
-        self,
-        *,
-        num_latents: int,
-        embed_dim: int,
-        width: int,
-        heads: int,
-        num_decoder_layers: int,
-        num_encoder_layers: int = 8,
-        pc_size: int = 5120,
-        pc_sharpedge_size: int = 5120,
-        point_feats: int = 3,
-        downsample_ratio: int = 20,
-        geo_decoder_downsample_ratio: int = 1,
-        geo_decoder_mlp_expand_ratio: int = 4,
-        geo_decoder_ln_post: bool = True,
-        num_freqs: int = 8,
-        include_pi: bool = True,
-        qkv_bias: bool = True,
-        qk_norm: bool = False,
-        label_type: str = "binary",
-        drop_path_rate: float = 0.0,
-        scale_factor: float = 1.0,
-        use_ln_post: bool = True,
-        enable_flashvdm: bool = False,
-        ckpt_path = None,
-        jitter_query: bool = False,
-        voxel_query: bool = False,
-        voxel_query_res: int = 128,
-    ):
-        super().__init__()
-        self.geo_decoder_ln_post = geo_decoder_ln_post
-        self.downsample_ratio = downsample_ratio
-        self.fourier_embedder = FourierEmbedder(num_freqs=num_freqs, include_pi=include_pi)
-        self.encoder = PointCrossAttentionEncoder(
-            fourier_embedder=self.fourier_embedder,
-            num_latents=num_latents,
-            downsample_ratio=self.downsample_ratio,
-            pc_size=pc_size,
-            pc_sharpedge_size=pc_sharpedge_size,
-            point_feats=point_feats,
-            width=width,
-            heads=heads,
-            layers=num_encoder_layers,
-            qkv_bias=qkv_bias,
-            use_ln_post=use_ln_post,
-            qk_norm=qk_norm,
-            jitter_query=jitter_query,
-            voxel_query=voxel_query,
-            voxel_query_res=voxel_query_res
-        )
-        self.pre_kl = nn.Linear(width, embed_dim * 2)
-        self.post_kl = nn.Linear(embed_dim, width)
-        self.transformer = Transformer(
-            n_ctx=num_latents,
-            width=width,
-            layers=num_decoder_layers,
-            heads=heads,
-            qkv_bias=qkv_bias,
-            qk_norm=qk_norm,
-            drop_path_rate=drop_path_rate
-        )
-        self.geo_decoder = CrossAttentionDecoder(
-            fourier_embedder=self.fourier_embedder,
-            out_channels=1,
-            num_latents=num_latents,
-            mlp_expand_ratio=geo_decoder_mlp_expand_ratio,
-            downsample_ratio=geo_decoder_downsample_ratio,
-            enable_ln_post=self.geo_decoder_ln_post,
-            width=width // geo_decoder_downsample_ratio,
-            heads=heads // geo_decoder_downsample_ratio,
-            qkv_bias=qkv_bias,
-            qk_norm=qk_norm,
-            label_type=label_type,
-        )
-        self.scale_factor = scale_factor
-        self.latent_shape = (num_latents, embed_dim)
-        if ckpt_path is not None:
-            self.init_from_ckpt(ckpt_path)
-        if enable_flashvdm:
-            self.enable_flashvdm_decoder()
-    def forward(self, latents):
-        latents = self.post_kl(latents)
-        latents = self.transformer(latents)
-        return latents
-    def encode(self, surface, sample_posterior=True, need_kl=False, need_voxel=False):
-        pc, feats = surface[:, :, :3], surface[:, :, 3:]
-        latents, pc_infos = self.encoder(pc, feats)
-        # print(latents.shape, self.pre_kl.weight.shape)
-        moments = self.pre_kl(latents)
-        posterior = DiagonalGaussianDistribution(moments, feat_dim=-1)
-        if sample_posterior:
-            latents = posterior.sample()
-        else:
-            latents = posterior.mode()
-        if need_kl:
-            return latents, posterior
-        if need_voxel:
-            return latents, pc_infos[0]
-        return latents
-    def decode(self, latents, voxel_idx=None):
-        latents = self.post_kl(latents)
-        latents = self.transformer(latents)
-        return latents
-    def query(self, latents, queries, voxel_idx=None):
-        """
-        Args:
-            queries (torch.FloatTensor): [B, N, 3]
-            latents (torch.FloatTensor): [B, embed_dim]
-        Returns:
-            logits (torch.FloatTensor): [B, N], occupancy logits
-        """
-        logits = self.geo_decoder(queries=queries, latents=latents).squeeze(-1)
-        return logits

ultrashape/models/autoencoders/surface_extractors.py DELETED Viewed

@@ -1,266 +0,0 @@
-# ==============================================================================
-# Original work Copyright (c) 2025 Tencent.
-# Modified work Copyright (c) 2025 UltraShape Team.
-#
-# Modified by UltraShape on 2025.12.25
-# ==============================================================================
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-from typing import Union, Tuple, List
-import numpy as np
-import torch
-from skimage import measure
-import cubvh
-class Latent2MeshOutput:
-    def __init__(self, mesh_v=None, mesh_f=None):
-        self.mesh_v = mesh_v
-        self.mesh_f = mesh_f
-def center_vertices(vertices):
-    """Translate the vertices so that bounding box is centered at zero."""
-    vert_min = vertices.min(dim=0)[0]
-    vert_max = vertices.max(dim=0)[0]
-    vert_center = 0.5 * (vert_min + vert_max)
-    return vertices - vert_center
-class SurfaceExtractor:
-    def _compute_box_stat(self, bounds: Union[Tuple[float], List[float], float], octree_resolution: int):
-        """
-        Compute grid size, bounding box minimum coordinates, and bounding box size based on input
-        bounds and resolution.
-        Args:
-            bounds (Union[Tuple[float], List[float], float]): Bounding box coordinates or a single
-            float representing half side length.
-                If float, bounds are assumed symmetric around zero in all axes.
-                Expected format if list/tuple: [xmin, ymin, zmin, xmax, ymax, zmax].
-            octree_resolution (int): Resolution of the octree grid.
-        Returns:
-            grid_size (List[int]): Grid size along each axis (x, y, z), each equal to octree_resolution + 1.
-            bbox_min (np.ndarray): Minimum coordinates of the bounding box (xmin, ymin, zmin).
-            bbox_size (np.ndarray): Size of the bounding box along each axis (xmax - xmin, etc.).
-        """
-        if isinstance(bounds, float):
-            bounds = [-bounds, -bounds, -bounds, bounds, bounds, bounds]
-        bbox_min, bbox_max = np.array(bounds[0:3]), np.array(bounds[3:6])
-        bbox_size = bbox_max - bbox_min
-        grid_size = [int(octree_resolution) + 1, int(octree_resolution) + 1, int(octree_resolution) + 1]
-        return grid_size, bbox_min, bbox_size
-    def run(self, *args, **kwargs):
-        """
-        Abstract method to extract surface mesh from grid logits.
-        This method should be implemented by subclasses.
-        Raises:
-            NotImplementedError: Always, since this is an abstract method.
-        """
-        return NotImplementedError
-    def __call__(self, grid_logits, **kwargs):
-        """
-        Process a batch of grid logits to extract surface meshes.
-        Args:
-            grid_logits (torch.Tensor): Batch of grid logits with shape (batch_size, ...).
-            **kwargs: Additional keyword arguments passed to the `run` method.
-        Returns:
-            List[Optional[Latent2MeshOutput]]: List of mesh outputs for each grid in the batch.
-                If extraction fails for a grid, None is appended at that position.
-        """
-        outputs = []
-        for i in range(grid_logits.shape[0]):
-            try:
-                vertices, faces = self.run(grid_logits[i], **kwargs)
-                vertices = vertices.astype(np.float32)
-                faces = np.ascontiguousarray(faces)
-                outputs.append(Latent2MeshOutput(mesh_v=vertices, mesh_f=faces))
-            except Exception:
-                import traceback
-                traceback.print_exc()
-                outputs.append(None)
-        return outputs
-def get_sparse_valid_voxels(grid_logit: torch.Tensor):
-    if not isinstance(grid_logit, torch.Tensor):
-        raise TypeError("Input must be a PyTorch tensor.")
-    if grid_logit.dim() != 3 or grid_logit.shape[0] != grid_logit.shape[1] or grid_logit.shape[0] != grid_logit.shape[2]:
-        raise ValueError("Input tensor must have shape (N, N, N)")
-    N = grid_logit.shape[0]
-    device = grid_logit.device
-    # Chunk processing to save memory
-    chunk_size = 128
-    all_sparse_coords = []
-    all_sparse_logits = []
-    # Process in chunks along x-axis
-    for start_x in range(0, N - 1, chunk_size):
-        end_x = min(start_x + chunk_size, N - 1)
-        # Determine slice range including +1 for neighbor checks
-        # slice_end needs to be end_x + 1 to include the neighbors for the last voxel in chunk
-        slice_end = end_x + 1
-        chunk = grid_logit[start_x:slice_end, :, :]
-        nan_mask = torch.isnan(chunk)
-        # Compute mask for this chunk (valid voxels are 0 to end_x - start_x)
-        # Note: chunk shape is [D_chunk, N, N].
-        # We want to check validity for [0..D_chunk-1, :-1, :-1]
-        sub_nan_mask = nan_mask
-        # Validity check requires looking at i and i+1
-        # Invalid if ANY corner is NaN
-        invalid_voxel_mask = (
-            sub_nan_mask[:-1, :-1, :-1] |
-            sub_nan_mask[1:, :-1, :-1]  |
-            sub_nan_mask[:-1, 1:, :-1]  |
-            sub_nan_mask[:-1, :-1, 1:]  |
-            sub_nan_mask[:-1, 1:, 1:]   |
-            sub_nan_mask[1:, :-1, 1:]   |
-            sub_nan_mask[1:, 1:, :-1]   |
-            sub_nan_mask[1:, 1:, 1:]
-        )
-        valid_voxel_mask = ~invalid_voxel_mask
-        # Get local coordinates
-        local_coords = valid_voxel_mask.nonzero(as_tuple=False)
-        if local_coords.shape[0] > 0:
-            lx, ly, lz = local_coords[:, 0], local_coords[:, 1], local_coords[:, 2]
-            # Extract logits using local indices on the chunk
-            # v0 is at lx, v1 is at lx+1, etc.
-            sparse_vertex_logits = torch.stack([
-                chunk[lx,     ly,     lz],     # v0
-                chunk[lx + 1, ly,     lz],     # v1
-                chunk[lx + 1, ly + 1, lz],     # v2
-                chunk[lx,     ly + 1, lz],     # v3
-                chunk[lx,     ly,     lz + 1], # v4
-                chunk[lx + 1, ly,     lz + 1], # v5
-                chunk[lx + 1, ly + 1, lz + 1], # v6
-                chunk[lx,     ly + 1, lz + 1]  # v7
-            ], dim=1)
-            # Convert local coords to global coords
-            # x coordinate needs offset added
-            global_coords = local_coords.clone()
-            global_coords[:, 0] += start_x
-            all_sparse_coords.append(global_coords)
-            all_sparse_logits.append(sparse_vertex_logits)
-        # Free memory
-        del chunk, nan_mask, invalid_voxel_mask, valid_voxel_mask, local_coords
-    if not all_sparse_coords:
-        return torch.empty((0, 3), dtype=torch.long, device=device), torch.empty((0, 8), dtype=grid_logit.dtype, device=device)
-    sparse_coords = torch.cat(all_sparse_coords, dim=0)
-    sparse_vertex_logits = torch.cat(all_sparse_logits, dim=0)
-    return sparse_coords, sparse_vertex_logits
-class MCSurfaceExtractor(SurfaceExtractor):
-    def run(self, grid_logit, *, mc_level, bounds, octree_resolution, **kwargs):
-        """
-        Extract surface mesh using the Marching Cubes algorithm.
-        Args:
-            grid_logit (torch.Tensor): 3D grid logits tensor representing the scalar field.
-            mc_level (float): The level (iso-value) at which to extract the surface.
-            bounds (Union[Tuple[float], List[float], float]): Bounding box coordinates or half side length.
-            octree_resolution (int): Resolution of the octree grid.
-            **kwargs: Additional keyword arguments (ignored).
-        Returns:
-            Tuple[np.ndarray, np.ndarray]: Tuple containing:
-                - vertices (np.ndarray): Extracted mesh vertices, scaled and translated to bounding
-                  box coordinates.
-                - faces (np.ndarray): Extracted mesh faces (triangles).
-        """
-        grid_logit = grid_logit.detach()
-        sparse_coords, sparse_logits = get_sparse_valid_voxels(grid_logit)
-        # Convert to float32 only for the sparse set
-        vertices, faces = cubvh.sparse_marching_cubes(sparse_coords, sparse_logits.float(), mc_level)
-        vertices, faces = vertices.cpu().numpy(), faces.cpu().numpy()
-        # vertices, faces, normals, _ = measure.marching_cubes(grid_logit,
-        #             mc_level, method="lewiner", mask=(~np.isnan(grid_logit)))
-        grid_size, bbox_min, bbox_size = self._compute_box_stat(bounds, octree_resolution)
-        vertices = vertices / grid_size * bbox_size + bbox_min
-        return vertices, faces
-class DMCSurfaceExtractor(SurfaceExtractor):
-    def run(self, grid_logit, *, octree_resolution, **kwargs):
-        """
-        Extract surface mesh using Differentiable Marching Cubes (DMC) algorithm.
-        Args:
-            grid_logit (torch.Tensor): 3D grid logits tensor representing the scalar field.
-            octree_resolution (int): Resolution of the octree grid.
-            **kwargs: Additional keyword arguments (ignored).
-        Returns:
-            Tuple[np.ndarray, np.ndarray]: Tuple containing:
-                - vertices (np.ndarray): Extracted mesh vertices, centered and converted to numpy.
-                - faces (np.ndarray): Extracted mesh faces (triangles), with reversed vertex order.
-        Raises:
-            ImportError: If the 'diso' package is not installed.
-        """
-        device = grid_logit.device
-        if not hasattr(self, 'dmc'):
-            try:
-                from diso import DiffDMC
-                self.dmc = DiffDMC(dtype=torch.float32).to(device)
-            except:
-                raise ImportError("Please install diso via `pip install diso`, or set mc_algo to 'mc'")
-        sdf = -grid_logit / octree_resolution
-        sdf = sdf.to(torch.float32).contiguous()
-        verts, faces = self.dmc(sdf, deform=None, return_quads=False, normalize=True)
-        verts = center_vertices(verts)
-        vertices = verts.detach().cpu().numpy()
-        faces = faces.detach().cpu().numpy()[:, ::-1]
-        return vertices, faces
-SurfaceExtractors = {
-    'mc': MCSurfaceExtractor,
-    'dmc': DMCSurfaceExtractor,
-}

ultrashape/models/autoencoders/vae_trainer.py DELETED Viewed

@@ -1,229 +0,0 @@
-import os
-from contextlib import contextmanager
-from typing import List, Tuple, Optional, Union
-import torch
-import torch.nn as nn
-from torch.optim import lr_scheduler
-import pytorch_lightning as pl
-from pytorch_lightning.utilities import rank_zero_info
-from pytorch_lightning.utilities import rank_zero_only
-import trimesh
-from ...utils.misc import instantiate_from_config, instantiate_non_trainable_model, instantiate_vae_model
-def export_to_trimesh(mesh_output):
-    if isinstance(mesh_output, list):
-        outputs = []
-        for mesh in mesh_output:
-            if mesh is None:
-                outputs.append(None)
-            else:
-                mesh.mesh_f = mesh.mesh_f[:, ::-1]
-                mesh_output = trimesh.Trimesh(mesh.mesh_v, mesh.mesh_f)
-                outputs.append(mesh_output)
-        return outputs
-    else:
-        mesh_output.mesh_f = mesh_output.mesh_f[:, ::-1]
-        mesh_output = trimesh.Trimesh(mesh_output.mesh_v, mesh_output.mesh_f)
-        return mesh_output
-class VAETrainer(pl.LightningModule):
-    def __init__(
-        self,
-        *,
-        vae_config,
-        optimizer_cfg,
-        loss_cfg,
-        save_dir,
-        mc_res,
-        ckpt_path: Optional[str] = None,
-        ignore_keys: Union[Tuple[str], List[str]] = (),
-        torch_compile: bool = False,
-    ):
-        super().__init__()
-        # ========= init optimizer config ========= #
-        self.optimizer_cfg = optimizer_cfg
-        self.loss_cfg = loss_cfg
-        self.ckpt_path = ckpt_path
-        self.vae_model = instantiate_vae_model(vae_config, requires_grad=True)
-        if ckpt_path is not None:
-            self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys)
-        self.mc_res = mc_res
-        self.save_root = save_dir
-        if not os.path.exists(save_dir):
-            os.makedirs(save_dir)
-        # ========= torch compile to accelerate ========= #
-        self.torch_compile = torch_compile
-        if self.torch_compile:
-            torch.nn.Module.compile(self.vae_model)
-            print(f'*' * 100)
-            print(f'Compile model for acceleration')
-            print(f'*' * 100)
-    def init_from_ckpt(self, path, ignore_keys=()):
-        ckpt = torch.load(path, map_location="cpu")
-        if 'state_dict' not in ckpt:
-            # deepspeed ckpt
-            state_dict = {}
-            for k in ckpt.keys():
-                new_k = k.replace('_forward_module.', '')
-                state_dict[new_k] = ckpt[k]
-        else:
-            state_dict = ckpt["state_dict"]
-        keys = list(state_dict.keys())
-        for k in keys:
-            for ik in ignore_keys:
-                if ik in k:
-                    print("Deleting key {} from state_dict.".format(k))
-                    del state_dict[k]
-        # # ==================== Weight Surgery Start ====================
-        # old_key_base = "vae_model.encoder.input_proj"
-        # old_weight_key = f"{old_key_base}.weight"
-        # old_bias_key = f"{old_key_base}.bias"
-        # if old_weight_key in state_dict:
-        #     print(f"[*] Detected legacy '{old_key_base}' in checkpoint. Performing weight surgery...")
-        #     src_weight = state_dict[old_weight_key]
-        #     src_bias = state_dict[old_bias_key]
-        #     encoder = self.vae_model.encoder
-        #     fourier_dim = encoder.fourier_embedder.out_dim
-        #     # --- A. input_proj_kv ---
-        #     # shape: [width, fourier_dim + point_feats]
-        #     encoder.input_proj_kv.weight.data.copy_(src_weight)
-        #     encoder.input_proj_kv.bias.data.copy_(src_bias)
-        #     print(f"    -> Loaded input_proj_kv from {old_key_base}")
-        #     # --- B. input_proj_q ---
-        #     # shape: [width, fourier_dim]
-        #     sliced_weight = src_weight[:, :fourier_dim]
-        #     encoder.input_proj_q.weight.data.copy_(sliced_weight)
-        #     encoder.input_proj_q.bias.data.copy_(src_bias)
-        #     print(f"    -> Loaded input_proj_q (sliced) from {old_key_base}")
-        #     del state_dict[old_weight_key]
-        #     if old_bias_key in state_dict:
-        #         del state_dict[old_bias_key]
-        # # ==================== Weight Surgery End ====================
-        missing, unexpected = self.load_state_dict(state_dict, strict=False)
-        print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys")
-        if len(missing) > 0:
-            print(f"Missing Keys: {missing}")
-            print(f"Unexpected Keys: {unexpected}")
-    def configure_optimizers(self) -> Tuple[List, List]:
-        lr = self.learning_rate
-        params_list = []
-        trainable_parameters = list(self.vae_model.parameters())
-        params_list.append({'params': trainable_parameters, 'lr': lr})
-        optimizer = instantiate_from_config(self.optimizer_cfg.optimizer, params=params_list, lr=lr)
-        if hasattr(self.optimizer_cfg, 'scheduler'):
-            scheduler_func = instantiate_from_config(
-                self.optimizer_cfg.scheduler,
-                max_decay_steps=self.trainer.max_steps,
-                lr_max=lr
-            )
-            scheduler = {
-                "scheduler": lr_scheduler.LambdaLR(optimizer, lr_lambda=scheduler_func.schedule),
-                "interval": "step",
-                "frequency": 1
-            }
-            schedulers = [scheduler]
-        else:
-            schedulers = []
-        optimizers = [optimizer]
-        return optimizers, schedulers
-    def on_train_epoch_start(self) -> None:
-        pl.seed_everything(self.trainer.global_rank)
-    def forward(self, batch):
-        sup_pc_s_list = [batch["sup_near_uniform"], batch["sup_near_sharp"], batch["sup_space"]]
-        rand_points = [sup_pc_s[:,:,:3] for sup_pc_s in sup_pc_s_list]
-        rand_points_val = [sup_pc_s[:,:,3:] for sup_pc_s in sup_pc_s_list]
-        rand_points = torch.cat(rand_points, dim=1)
-        target = torch.cat(rand_points_val, dim=1)[...,0]
-        target = -target
-        latents, posterior = self.vae_model.encode(
-            batch['surface'], sample_posterior=True, need_kl=True)
-        latents = self.vae_model.decode(latents)
-        logits = self.vae_model.query(latents, rand_points)
-        loss_kl = posterior.kl()
-        loss_kl = torch.sum(loss_kl) / loss_kl.shape[0]
-        criteria = torch.nn.MSELoss()
-        criteria2 = torch.nn.L1Loss()
-        loss_logits = criteria(logits, target).mean() + criteria2(logits, target).mean()
-        loss = self.loss_cfg.lambda_logits * loss_logits + self.loss_cfg.lambda_kl * loss_kl
-        loss_dict = {
-            "loss": loss,
-            "loss_logits": loss_logits,
-            "loss_kl": loss_kl
-        }
-        return loss_dict, latents
-    def training_step(self, batch, batch_idx, optimizer_idx=0):
-        loss, latents = self.forward(batch)
-        split = 'train'
-        loss_dict = {
-            f"{split}/total_loss": loss["loss"].detach(),
-            f"{split}/loss_logits": loss["loss_logits"].detach(),
-            f"{split}/loss_kl": loss["loss_kl"].detach(),
-            f"{split}/lr_abs": self.optimizers().param_groups[0]['lr'],
-        }
-        self.log_dict(loss_dict, prog_bar=True, logger=True, sync_dist=False, rank_zero_only=True)
-        return loss
-    def validation_step(self, batch, batch_idx, optimizer_idx=0):
-        loss, latents = self.forward(batch)
-        split = 'val'
-        loss_dict = {
-            f"{split}/total_loss": loss["loss"].detach(),
-            f"{split}/loss_logits": loss["loss_logits"].detach(),
-            f"{split}/loss_kl": loss["loss_kl"].detach(),
-            f"{split}/lr_abs": self.optimizers().param_groups[0]['lr'],
-        }
-        self.log_dict(loss_dict, prog_bar=True, logger=True, sync_dist=False, rank_zero_only=True)
-        if self.trainer.global_rank < 2:
-            with torch.no_grad():
-                save_dir = f"{self.save_root}/gs{self.global_step:010d}_rank{self.trainer.global_rank}"
-                if not os.path.exists(save_dir):
-                    os.makedirs(save_dir)
-                uids = batch.get('uid')
-                for i, latent in enumerate(latents[:5]):
-                    mesh, grid_logits = self.vae_model.latents2mesh(
-                            latent[None],
-                            output_type='trimesh',
-                            bounds=1.01,
-                            mc_level=0.0,
-                            num_chunks=20000,
-                            octree_resolution=self.mc_res,
-                            mc_algo='mc',
-                            enable_pbar=True
-                        )
-                    mesh = export_to_trimesh(mesh[0])
-                    save_path = f"{save_dir}/recon_{os.path.splitext(os.path.basename(uids[i]))[0]}_mc{self.mc_res}.obj"
-                    mesh.export(save_path)
-        return loss

ultrashape/models/autoencoders/volume_decoders.py DELETED Viewed

@@ -1,440 +0,0 @@
-# ==============================================================================
-# Original work Copyright (c) 2025 Tencent.
-# Modified work Copyright (c) 2025 UltraShape Team.
-#
-# Modified by UltraShape on 2025.12.25
-# ==============================================================================
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-from typing import Union, Tuple, List, Callable
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from einops import repeat
-from tqdm import tqdm
-from .attention_blocks import CrossAttentionDecoder
-from .attention_processors import FlashVDMCrossAttentionProcessor, FlashVDMTopMCrossAttentionProcessor
-from ...utils import logger
-def extract_near_surface_volume_fn(input_tensor: torch.Tensor, alpha: float):
-    val = input_tensor + alpha
-    valid_mask = val > -9000
-    mask = torch.ones_like(val, dtype=torch.int32)
-    sign = torch.sign(val.to(torch.float32))
-    # Helper to compute neighbor for a single direction
-    def check_neighbor_sign(shift, axis):
-        if shift == 0:
-            return
-        pad_dims = [0, 0, 0, 0, 0, 0]
-        if axis == 0:
-            pad_idx = 0 if shift > 0 else 1
-            pad_dims[pad_idx] = abs(shift)
-        elif axis == 1:
-            pad_idx = 2 if shift > 0 else 3
-            pad_dims[pad_idx] = abs(shift)
-        elif axis == 2:
-            pad_idx = 4 if shift > 0 else 5
-            pad_dims[pad_idx] = abs(shift)
-        padded = F.pad(val.unsqueeze(0).unsqueeze(0), pad_dims[::-1], mode='replicate')
-        slice_dims = [slice(None)] * 3
-        if axis == 0:
-            if shift > 0: slice_dims[0] = slice(shift, None)
-            else: slice_dims[0] = slice(None, shift)
-        elif axis == 1:
-            if shift > 0: slice_dims[1] = slice(shift, None)
-            else: slice_dims[1] = slice(None, shift)
-        elif axis == 2:
-            if shift > 0: slice_dims[2] = slice(shift, None)
-            else: slice_dims[2] = slice(None, shift)
-        padded = padded.squeeze(0).squeeze(0)
-        neighbor = padded[slice_dims]
-        neighbor = torch.where(neighbor > -9000, neighbor, val)
-        # Check sign consistency
-        neighbor_sign = torch.sign(neighbor.to(torch.float32))
-        return (neighbor_sign == sign)
-    # Iteratively check neighbors and update mask
-    # directions: (shift, axis)
-    directions = [(1, 0), (-1, 0), (1, 1), (-1, 1), (1, 2), (-1, 2)]
-    for shift, axis in directions:
-        is_same = check_neighbor_sign(shift, axis)
-        mask = mask & is_same.to(torch.int32)
-    # Invert mask: we want 1 where ANY neighbor has different sign
-    mask = (~(mask.bool())).to(torch.int32)
-    return mask * valid_mask.to(torch.int32)
-def generate_dense_grid_points(
-    bbox_min: np.ndarray,
-    bbox_max: np.ndarray,
-    octree_resolution: int,
-    indexing: str = "ij",
-):
-    length = bbox_max - bbox_min
-    num_cells = octree_resolution
-    x = np.linspace(bbox_min[0], bbox_max[0], int(num_cells) + 1, dtype=np.float32)
-    y = np.linspace(bbox_min[1], bbox_max[1], int(num_cells) + 1, dtype=np.float32)
-    z = np.linspace(bbox_min[2], bbox_max[2], int(num_cells) + 1, dtype=np.float32)
-    [xs, ys, zs] = np.meshgrid(x, y, z, indexing=indexing)
-    xyz = np.stack((xs, ys, zs), axis=-1)
-    grid_size = [int(num_cells) + 1, int(num_cells) + 1, int(num_cells) + 1]
-    return xyz, grid_size, length
-class VanillaVolumeDecoder:
-    @torch.no_grad()
-    def __call__(
-        self,
-        latents: torch.FloatTensor,
-        geo_decoder: Callable,
-        bounds: Union[Tuple[float], List[float], float] = 1.01,
-        num_chunks: int = 10000,
-        octree_resolution: int = None,
-        enable_pbar: bool = True,
-        **kwargs,
-    ):
-        device = latents.device
-        dtype = latents.dtype
-        batch_size = latents.shape[0]
-        # 1. generate query points
-        if isinstance(bounds, float):
-            bounds = [-bounds, -bounds, -bounds, bounds, bounds, bounds]
-        bbox_min, bbox_max = np.array(bounds[0:3]), np.array(bounds[3:6])
-        xyz_samples, grid_size, length = generate_dense_grid_points(
-            bbox_min=bbox_min,
-            bbox_max=bbox_max,
-            octree_resolution=octree_resolution,
-            indexing="ij"
-        )
-        xyz_samples = torch.from_numpy(xyz_samples).to(device, dtype=dtype).contiguous().reshape(-1, 3)
-        # 2. latents to 3d volume
-        batch_logits = []
-        for start in tqdm(range(0, xyz_samples.shape[0], num_chunks), desc=f"Volume Decoding",
-                          disable=not enable_pbar):
-            chunk_queries = xyz_samples[start: start + num_chunks, :]
-            chunk_queries = repeat(chunk_queries, "p c -> b p c", b=batch_size)
-            logits = geo_decoder(queries=chunk_queries, latents=latents)
-            batch_logits.append(logits)
-        grid_logits = torch.cat(batch_logits, dim=1)
-        grid_logits = grid_logits.view((batch_size, *grid_size)).float()
-        return grid_logits
-class HierarchicalVolumeDecoding:
-    @torch.no_grad()
-    def __call__(
-        self,
-        latents: torch.FloatTensor,
-        geo_decoder: Callable,
-        bounds: Union[Tuple[float], List[float], float] = 1.01,
-        num_chunks: int = 10000,
-        mc_level: float = 0.0,
-        octree_resolution: int = None,
-        min_resolution: int = 63,
-        enable_pbar: bool = True,
-        **kwargs,
-    ):
-        device = latents.device
-        dtype = latents.dtype
-        resolutions = []
-        if octree_resolution < min_resolution:
-            resolutions.append(octree_resolution)
-        while octree_resolution >= min_resolution:
-            resolutions.append(octree_resolution)
-            octree_resolution = octree_resolution // 2
-        resolutions.reverse()
-        # 1. generate query points
-        if isinstance(bounds, float):
-            bounds = [-bounds, -bounds, -bounds, bounds, bounds, bounds]
-        bbox_min = np.array(bounds[0:3])
-        bbox_max = np.array(bounds[3:6])
-        bbox_size = bbox_max - bbox_min
-        xyz_samples, grid_size, length = generate_dense_grid_points(
-            bbox_min=bbox_min,
-            bbox_max=bbox_max,
-            octree_resolution=resolutions[0],
-            indexing="ij"
-        )
-        dilate = nn.Conv3d(1, 1, 3, padding=1, bias=False, device=device, dtype=dtype)
-        dilate.weight = torch.nn.Parameter(torch.ones(dilate.weight.shape, dtype=dtype, device=device))
-        grid_size = np.array(grid_size)
-        xyz_samples = torch.from_numpy(xyz_samples).to(device, dtype=dtype).contiguous().reshape(-1, 3)
-        # 2. latents to 3d volume
-        batch_logits = []
-        batch_size = latents.shape[0]
-        for start in tqdm(range(0, xyz_samples.shape[0], num_chunks),
-                          desc=f"Hierarchical Volume Decoding [r{resolutions[0] + 1}]"):
-            queries = xyz_samples[start: start + num_chunks, :]
-            batch_queries = repeat(queries, "p c -> b p c", b=batch_size)
-            logits = geo_decoder(queries=batch_queries, latents=latents)
-            batch_logits.append(logits)
-        grid_logits = torch.cat(batch_logits, dim=1).view((batch_size, grid_size[0], grid_size[1], grid_size[2]))
-        for octree_depth_now in resolutions[1:]:
-            grid_size = np.array([octree_depth_now + 1] * 3)
-            resolution = bbox_size / octree_depth_now
-            next_index = torch.zeros(tuple(grid_size), dtype=dtype, device=device)
-            next_logits = torch.full(next_index.shape, -10000., dtype=dtype, device=device)
-            curr_points = extract_near_surface_volume_fn(grid_logits.squeeze(0), mc_level)
-            curr_points += grid_logits.squeeze(0).abs() < 0.95
-            if octree_depth_now == resolutions[-1]:
-                expand_num = 0
-            else:
-                expand_num = 1
-            for i in range(expand_num):
-                curr_points = dilate(curr_points.unsqueeze(0).to(dtype)).squeeze(0)
-            (cidx_x, cidx_y, cidx_z) = torch.where(curr_points > 0)
-            next_index[cidx_x * 2, cidx_y * 2, cidx_z * 2] = 1
-            for i in range(2 - expand_num):
-                next_index = dilate(next_index.unsqueeze(0)).squeeze(0)
-            nidx = torch.where(next_index > 0)
-            # Store shape before deleting
-            next_index_shape = next_index.shape
-            del next_index
-            torch.cuda.empty_cache()
-            next_points = torch.stack(nidx, dim=1)
-            next_points = (next_points * torch.tensor(resolution, dtype=next_points.dtype, device=device) +
-                           torch.tensor(bbox_min, dtype=next_points.dtype, device=device))
-            batch_logits = []
-            for start in tqdm(range(0, next_points.shape[0], num_chunks),
-                              desc=f"Hierarchical Volume Decoding [r{octree_depth_now + 1}]"):
-                queries = next_points[start: start + num_chunks, :]
-                batch_queries = repeat(queries, "p c -> b p c", b=batch_size)
-                logits = geo_decoder(queries=batch_queries.to(latents.dtype), latents=latents)
-                batch_logits.append(logits)
-            # Delayed allocation of next_logits
-            next_logits = torch.full(next_index_shape, -10000., dtype=dtype, device=device)
-            grid_logits = torch.cat(batch_logits, dim=1)
-            next_logits[nidx] = grid_logits[0, ..., 0]
-            grid_logits = next_logits.unsqueeze(0)
-        grid_logits[grid_logits == -10000.] = float('nan')
-        return grid_logits
-class FlashVDMVolumeDecoding:
-    def __init__(self, topk_mode='mean'):
-        if topk_mode not in ['mean', 'merge']:
-            raise ValueError(f'Unsupported topk_mode {topk_mode}, available: {["mean", "merge"]}')
-        if topk_mode == 'mean':
-            self.processor = FlashVDMCrossAttentionProcessor()
-        else:
-            self.processor = FlashVDMTopMCrossAttentionProcessor()
-    @torch.no_grad()
-    def __call__(
-        self,
-        latents: torch.FloatTensor,
-        geo_decoder: CrossAttentionDecoder,
-        bounds: Union[Tuple[float], List[float], float] = 1.01,
-        num_chunks: int = 10000,
-        mc_level: float = 0.0,
-        octree_resolution: int = None,
-        min_resolution: int = 63,
-        mini_grid_num: int = 4,
-        enable_pbar: bool = True,
-        **kwargs,
-    ):
-        processor = self.processor
-        geo_decoder.set_cross_attention_processor(processor)
-        device = latents.device
-        dtype = latents.dtype
-        resolutions = []
-        if octree_resolution < min_resolution:
-            resolutions.append(octree_resolution)
-        while octree_resolution >= min_resolution:
-            resolutions.append(octree_resolution)
-            octree_resolution = octree_resolution // 2
-        resolutions.reverse()
-        resolutions[0] = round(resolutions[0] / mini_grid_num) * mini_grid_num - 1
-        for i, resolution in enumerate(resolutions[1:]):
-            resolutions[i + 1] = resolutions[0] * 2 ** (i + 1)
-        logger.info(f"FlashVDMVolumeDecoding Resolution: {resolutions}")
-        # 1. generate query points
-        if isinstance(bounds, float):
-            bounds = [-bounds, -bounds, -bounds, bounds, bounds, bounds]
-        bbox_min = np.array(bounds[0:3])
-        bbox_max = np.array(bounds[3:6])
-        bbox_size = bbox_max - bbox_min
-        xyz_samples, grid_size, length = generate_dense_grid_points(
-            bbox_min=bbox_min,
-            bbox_max=bbox_max,
-            octree_resolution=resolutions[0],
-            indexing="ij"
-        )
-        dilate = nn.Conv3d(1, 1, 3, padding=1, bias=False, device=device, dtype=dtype)
-        dilate.weight = torch.nn.Parameter(torch.ones(dilate.weight.shape, dtype=dtype, device=device))
-        grid_size = np.array(grid_size)
-        # 2. latents to 3d volume
-        xyz_samples = torch.from_numpy(xyz_samples).to(device, dtype=dtype)
-        batch_size = latents.shape[0]
-        mini_grid_size = xyz_samples.shape[0] // mini_grid_num
-        xyz_samples = xyz_samples.view(
-            mini_grid_num, mini_grid_size,
-            mini_grid_num, mini_grid_size,
-            mini_grid_num, mini_grid_size, 3
-        ).permute(
-            0, 2, 4, 1, 3, 5, 6
-        ).reshape(
-            -1, mini_grid_size * mini_grid_size * mini_grid_size, 3
-        )
-        batch_logits = []
-        num_batchs = max(num_chunks // xyz_samples.shape[1], 1)
-        for start in tqdm(range(0, xyz_samples.shape[0], num_batchs),
-                          desc=f"FlashVDM Volume Decoding", disable=not enable_pbar):
-            queries = xyz_samples[start: start + num_batchs, :]
-            batch = queries.shape[0]
-            batch_latents = repeat(latents.squeeze(0), "p c -> b p c", b=batch)
-            processor.topk = True
-            # Chunk queries along dim 1 if too large
-            if queries.shape[1] > num_chunks:
-                batch_logits_sub = []
-                for sub_start in range(0, queries.shape[1], num_chunks):
-                    sub_queries = queries[:, sub_start: sub_start + num_chunks, :]
-                    logits = geo_decoder(queries=sub_queries, latents=batch_latents)
-                    batch_logits_sub.append(logits)
-                logits = torch.cat(batch_logits_sub, dim=1)
-            else:
-                logits = geo_decoder(queries=queries, latents=batch_latents)
-            batch_logits.append(logits)
-        grid_logits = torch.cat(batch_logits, dim=0).reshape(
-            mini_grid_num, mini_grid_num, mini_grid_num,
-            mini_grid_size, mini_grid_size,
-            mini_grid_size
-        ).permute(0, 3, 1, 4, 2, 5).contiguous().view(
-            (batch_size, grid_size[0], grid_size[1], grid_size[2])
-        )
-        for octree_depth_now in resolutions[1:]:
-            grid_size = np.array([octree_depth_now + 1] * 3)
-            resolution = bbox_size / octree_depth_now
-            next_index = torch.zeros(tuple(grid_size), dtype=dtype, device=device)
-            curr_points = extract_near_surface_volume_fn(grid_logits.squeeze(0), mc_level)
-            curr_points += grid_logits.squeeze(0).abs() < 0.95
-            if octree_depth_now == resolutions[-1]:
-                expand_num = 0
-            else:
-                expand_num = 1
-            for i in range(expand_num):
-                curr_points = dilate(curr_points.unsqueeze(0).to(dtype)).squeeze(0)
-                curr_points = dilate(curr_points.unsqueeze(0).to(dtype)).squeeze(0)
-            (cidx_x, cidx_y, cidx_z) = torch.where(curr_points > 0)
-            next_index[cidx_x * 2, cidx_y * 2, cidx_z * 2] = 1
-            for i in range(2 - expand_num):
-                next_index = dilate(next_index.unsqueeze(0)).squeeze(0)
-            nidx = torch.where(next_index > 0)
-            # Store shape before deleting
-            next_index_shape = next_index.shape
-            del next_index
-            torch.cuda.empty_cache()
-            next_points = torch.stack(nidx, dim=1)
-            next_points = (next_points * torch.tensor(resolution, dtype=torch.float32, device=device) +
-                           torch.tensor(bbox_min, dtype=torch.float32, device=device))
-            query_grid_num = 6
-            min_val = next_points.min(axis=0).values
-            max_val = next_points.max(axis=0).values
-            vol_queries_index = (next_points - min_val) / (max_val - min_val) * (query_grid_num - 0.001)
-            index = torch.floor(vol_queries_index).long()
-            index = index[..., 0] * (query_grid_num ** 2) + index[..., 1] * query_grid_num + index[..., 2]
-            index = index.sort()
-            next_points = next_points[index.indices].unsqueeze(0).contiguous()
-            unique_values = torch.unique(index.values, return_counts=True)
-            grid_logits = torch.zeros((next_points.shape[1]), dtype=latents.dtype, device=latents.device)
-            input_grid = [[], []]
-            logits_grid_list = []
-            start_num = 0
-            sum_num = 0
-            for grid_index, count in zip(unique_values[0].cpu().tolist(), unique_values[1].cpu().tolist()):
-                remaining_count = count
-                while remaining_count > 0:
-                    space_left = num_chunks - sum_num
-                    # If buffer is full, flush it
-                    if space_left <= 0:
-                        processor.topk = input_grid
-                        logits_grid = geo_decoder(queries=next_points[:, start_num:start_num + sum_num], latents=latents)
-                        start_num = start_num + sum_num
-                        logits_grid_list.append(logits_grid)
-                        input_grid = [[], []]
-                        sum_num = 0
-                        space_left = num_chunks
-                    take = min(remaining_count, space_left)
-                    input_grid[0].append(grid_index)
-                    input_grid[1].append(take)
-                    sum_num += take
-                    remaining_count -= take
-            if sum_num > 0:
-                processor.topk = input_grid
-                logits_grid = geo_decoder(queries=next_points[:, start_num:start_num + sum_num], latents=latents)
-                logits_grid_list.append(logits_grid)
-            logits_grid = torch.cat(logits_grid_list, dim=1)
-            grid_logits[index.indices] = logits_grid.squeeze(0).squeeze(-1)
-            # Delayed allocation of next_logits
-            next_logits = torch.full(next_index_shape, -10000., dtype=dtype, device=device)
-            next_logits[nidx] = grid_logits
-            grid_logits = next_logits.unsqueeze(0)
-        grid_logits[grid_logits == -10000.] = float('nan')
-        return grid_logits

ultrashape/models/conditioner_mask.py DELETED Viewed

@@ -1,337 +0,0 @@
-# ==============================================================================
-# Original work Copyright (c) 2025 Tencent.
-# Modified work Copyright (c) 2025 UltraShape Team.
-#
-# Modified by UltraShape on 2025.12.25
-# ==============================================================================
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import numpy as np
-import torch
-import torch.nn as nn
-from torchvision import transforms
-from transformers import (
-    CLIPVisionModelWithProjection,
-    CLIPVisionConfig,
-    Dinov2Model,
-    Dinov2Config,
-)
-from transformers import AutoImageProcessor, AutoModel
-def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
-    """
-    embed_dim: output dimension for each position
-    pos: a list of positions to be encoded: size (M,)
-    out: (M, D)
-    """
-    assert embed_dim % 2 == 0
-    omega = np.arange(embed_dim // 2, dtype=np.float64)
-    omega /= embed_dim / 2.
-    omega = 1. / 10000 ** omega  # (D/2,)
-    pos = pos.reshape(-1)  # (M,)
-    out = np.einsum('m,d->md', pos, omega)  # (M, D/2), outer product
-    emb_sin = np.sin(out)  # (M, D/2)
-    emb_cos = np.cos(out)  # (M, D/2)
-    return np.concatenate([emb_sin, emb_cos], axis=1)
-class ImageEncoder(nn.Module):
-    def __init__(
-        self,
-        version=None,
-        config=None,
-        use_cls_token=True,
-        image_size=224,
-        **kwargs,
-    ):
-        super().__init__()
-        if config is None:
-            self.model = AutoModel.from_pretrained(version)
-        else:
-            self.model = self.MODEL_CLASS(self.MODEL_CONFIG_CLASS.from_dict(config))
-        self.model.eval()
-        self.model.requires_grad_(False)
-        self.use_cls_token = use_cls_token
-        self.size = image_size // 14
-        self.num_patches = (image_size // 14) ** 2
-        if self.use_cls_token:
-            self.num_patches += 1
-        self.transform = transforms.Compose(
-            [
-                transforms.Resize(image_size, transforms.InterpolationMode.BILINEAR, antialias=True),
-                transforms.CenterCrop(image_size),
-                transforms.Normalize(
-                    mean=self.mean,
-                    std=self.std,
-                ),
-            ]
-        )
-        self.mask_transform = transforms.Compose(
-            [
-                transforms.Resize(image_size, interpolation=transforms.InterpolationMode.NEAREST),
-                transforms.CenterCrop(image_size),
-            ]
-        )
-    def forward(self, image, mask=None, value_range=(-1, 1), **kwargs):
-        if value_range is not None:
-            low, high = value_range
-            image = (image - low) / (high - low)
-        image = image.to(self.model.device, dtype=self.model.dtype)
-        inputs = self.transform(image)
-        outputs = self.model(inputs)
-        last_hidden_state = outputs.last_hidden_state
-        if not self.use_cls_token:
-            last_hidden_state = last_hidden_state[:, 1:, :]
-        if mask is not None:
-            pool = nn.MaxPool2d(kernel_size=(14, 14), stride=(14, 14))
-            mask = self.mask_transform(mask)
-            mask = mask.to(image.device, dtype=image.dtype)
-            downsampled_mask = pool(mask)
-            flattened_mask = downsampled_mask.view(downsampled_mask.shape[0], -1)
-            flattened_mask = flattened_mask.unsqueeze(-1)
-            if self.use_cls_token:
-                flattened_mask = torch.cat(
-                    [torch.ones(flattened_mask.shape[0], 1, 1, device=flattened_mask.device, dtype=flattened_mask.dtype),
-                     flattened_mask], dim=1)
-            valid_mask = (flattened_mask != -1).float()
-            masked_hidden_state = last_hidden_state * valid_mask
-            valid_mask_bool = valid_mask.squeeze(-1) > 0
-            valid_counts = valid_mask_bool.sum(dim=1)
-            max_valid_tokens = valid_counts.max().item()
-            batch_indices = torch.arange(valid_mask_bool.shape[0], device=valid_mask_bool.device)
-            batch_indices = batch_indices.unsqueeze(1).expand(-1, valid_mask_bool.shape[1])
-            flat_batch_indices = batch_indices[valid_mask_bool]
-            flat_token_indices = torch.arange(valid_mask_bool.shape[1], device=valid_mask_bool.device)
-            flat_token_indices = flat_token_indices.unsqueeze(0).expand(valid_mask_bool.shape[0], -1)
-            flat_token_indices = flat_token_indices[valid_mask_bool]
-            valid_tokens = masked_hidden_state[flat_batch_indices, flat_token_indices]
-            # Create output tensor with special padding value (-1) instead of zeros
-            final_output = torch.full(
-                (valid_mask_bool.shape[0], max_valid_tokens, last_hidden_state.shape[-1]),
-                -1.0,  # Use -1 as padding value to clearly distinguish from valid tokens
-                device=last_hidden_state.device, dtype=last_hidden_state.dtype
-            )
-            cum_counts = torch.cumsum(valid_counts, dim=0) - valid_counts
-            for i in range(valid_mask_bool.shape[0]):
-                if valid_counts[i] > 0:
-                    start_idx = cum_counts[i]
-                    end_idx = start_idx + valid_counts[i]
-                    final_output[i, :valid_counts[i]] = valid_tokens[start_idx:end_idx]
-            return final_output
-        return last_hidden_state
-    def unconditional_embedding(self, batch_size, **kwargs):
-        device = next(self.model.parameters()).device
-        dtype = next(self.model.parameters()).dtype
-        num_tokens = kwargs.get('num_tokens', self.num_patches)
-        zero = torch.zeros(
-            batch_size,
-            num_tokens,
-            self.model.config.hidden_size,
-            device=device,
-            dtype=dtype,
-        )
-        return zero
-class CLIPImageEncoder(ImageEncoder):
-    MODEL_CLASS = CLIPVisionModelWithProjection
-    MODEL_CONFIG_CLASS = CLIPVisionConfig
-    mean = [0.48145466, 0.4578275, 0.40821073]
-    std = [0.26862954, 0.26130258, 0.27577711]
-class DinoImageEncoder(ImageEncoder):
-    MODEL_CLASS = Dinov2Model
-    MODEL_CONFIG_CLASS = Dinov2Config
-    mean = [0.485, 0.456, 0.406]
-    std = [0.229, 0.224, 0.225]
-class DinoImageEncoderMV(DinoImageEncoder):
-    def __init__(
-        self,
-        version=None,
-        config=None,
-        use_cls_token=True,
-        image_size=224,
-        view_num=4,
-        **kwargs,
-    ):
-        super().__init__(version, config, use_cls_token, image_size, **kwargs)
-        self.view_num = view_num
-        self.num_patches = self.num_patches
-        pos = np.arange(self.view_num, dtype=np.float32)
-        view_embedding = torch.from_numpy(
-            get_1d_sincos_pos_embed_from_grid(self.model.config.hidden_size, pos)).float()
-        view_embedding = view_embedding.unsqueeze(1).repeat(1, self.num_patches, 1)
-        self.view_embed = view_embedding.unsqueeze(0)
-    def forward(self, image, mask=None, value_range=(-1, 1), view_idxs=None):
-        if value_range is not None:
-            low, high = value_range
-            image = (image - low) / (high - low)
-        image = image.to(self.model.device, dtype=self.model.dtype)
-        bs, num_views, c, h, w = image.shape
-        image = image.view(bs * num_views, c, h, w)
-        inputs = self.transform(image)
-        outputs = self.model(inputs)
-        last_hidden_state = outputs.last_hidden_state
-        last_hidden_state = last_hidden_state.view(
-            bs, num_views, last_hidden_state.shape[-2],
-            last_hidden_state.shape[-1]
-        )
-        view_embedding = self.view_embed.to(last_hidden_state.dtype).to(last_hidden_state.device)
-        if view_idxs is not None:
-            assert len(view_idxs) == bs
-            view_embeddings = []
-            for i in range(bs):
-                view_idx = view_idxs[i]
-                assert num_views == len(view_idx)
-                view_embeddings.append(self.view_embed[:, view_idx, ...])
-            view_embedding = torch.cat(view_embeddings, 0).to(last_hidden_state.dtype).to(last_hidden_state.device)
-        if num_views != self.view_num:
-            view_embedding = view_embedding[:, :num_views, ...]
-        last_hidden_state = last_hidden_state + view_embedding
-        last_hidden_state = last_hidden_state.view(bs, num_views * last_hidden_state.shape[-2],
-                                                   last_hidden_state.shape[-1])
-        return last_hidden_state
-    def unconditional_embedding(self, batch_size, view_idxs=None, **kwargs):
-        device = next(self.model.parameters()).device
-        dtype = next(self.model.parameters()).dtype
-        zero = torch.zeros(
-            batch_size,
-            self.num_patches * len(view_idxs[0]),
-            self.model.config.hidden_size,
-            device=device,
-            dtype=dtype,
-        )
-        return zero
-def build_image_encoder(config):
-    if config['type'] == 'CLIPImageEncoder':
-        return CLIPImageEncoder(**config['kwargs'])
-    elif config['type'] == 'DinoImageEncoder':
-        return DinoImageEncoder(**config['kwargs'])
-    elif config['type'] == 'DinoImageEncoderMV':
-        return DinoImageEncoderMV(**config['kwargs'])
-    else:
-        raise ValueError(f'Unknown image encoder type: {config["type"]}')
-class DualImageEncoder(nn.Module):
-    def __init__(
-        self,
-        main_image_encoder,
-        additional_image_encoder,
-    ):
-        super().__init__()
-        self.main_image_encoder = build_image_encoder(main_image_encoder)
-        self.additional_image_encoder = build_image_encoder(additional_image_encoder)
-    def forward(self, image, mask=None, **kwargs):
-        outputs = {
-            'main': self.main_image_encoder(image, mask=mask, **kwargs),
-            'additional': self.additional_image_encoder(image, mask=mask, **kwargs),
-        }
-        return outputs
-    def unconditional_embedding(self, batch_size, **kwargs):
-        outputs = {
-            'main': self.main_image_encoder.unconditional_embedding(batch_size, **kwargs),
-            'additional': self.additional_image_encoder.unconditional_embedding(batch_size, **kwargs),
-        }
-        return outputs
-class SingleImageEncoder(nn.Module):
-    def __init__(
-        self,
-        main_image_encoder,
-        drop_ratio=0.1,
-    ):
-        super().__init__()
-        self.main_image_encoder = build_image_encoder(main_image_encoder)
-        self.drop_ratio = drop_ratio
-        # self.disable_drop = disable_drop
-    def forward(self, image, disable_drop=True, mask=None, **kwargs):
-        outputs = {
-            'main': self.main_image_encoder(image, mask=mask, **kwargs),
-        }
-        if disable_drop:
-            return outputs
-        else:
-            random_p = torch.rand(len(image), device='cuda')
-            remain_bool_tensor = random_p > self.drop_ratio
-            outputs['main'] *= remain_bool_tensor.view(-1,1,1)
-        return outputs
-        outputs = {
-            'main': self.main_image_encoder(image, mask=mask, **kwargs),
-        }
-        return outputs
-    def unconditional_embedding(self, batch_size, **kwargs):
-        outputs = {
-            'main': self.main_image_encoder.unconditional_embedding(batch_size, **kwargs),
-        }
-        return outputs

ultrashape/models/denoisers/__init__.py DELETED Viewed

@@ -1,22 +0,0 @@
-# ==============================================================================
-# Original work Copyright (c) 2025 Tencent.
-# Modified work Copyright (c) 2025 UltraShape Team.
-#
-# Modified by UltraShape on 2025.12.25
-# ==============================================================================
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-from .dit_mask import RefineDiT

ultrashape/models/denoisers/dit_mask.py DELETED Viewed

@@ -1,725 +0,0 @@
-# ==============================================================================
-# Original work Copyright (c) 2025 Tencent.
-# Modified work Copyright (c) 2025 UltraShape Team.
-#
-# Modified by UltraShape on 2025.12.25
-# ==============================================================================
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import os
-import yaml
-import math
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from einops import rearrange
-from .moe_layers import MoEBlock
-from ...utils import logger, synchronize_timer, smart_load_model
-from flash_attn import flash_attn_varlen_func
-def modulate(x, shift, scale):
-    return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1)
-class Timesteps(nn.Module):
-    def __init__(self,
-                 num_channels: int,
-                 downscale_freq_shift: float = 0.0,
-                 scale: int = 1,
-                 max_period: int = 10000
-                 ):
-        super().__init__()
-        self.num_channels = num_channels
-        self.downscale_freq_shift = downscale_freq_shift
-        self.scale = scale
-        self.max_period = max_period
-    def forward(self, timesteps):
-        assert len(timesteps.shape) == 1, "Timesteps should be a 1d-array"
-        embedding_dim = self.num_channels
-        half_dim = embedding_dim // 2
-        exponent = -math.log(self.max_period) * torch.arange(
-            start=0, end=half_dim, dtype=torch.float32, device=timesteps.device)
-        exponent = exponent / (half_dim - self.downscale_freq_shift)
-        emb = torch.exp(exponent)
-        emb = timesteps[:, None].float() * emb[None, :]
-        emb = self.scale * emb
-        emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=-1)
-        if embedding_dim % 2 == 1:
-            emb = torch.nn.functional.pad(emb, (0, 1, 0, 0))
-        return emb
-class TimestepEmbedder(nn.Module):
-    """
-    Embeds scalar timesteps into vector representations.
-    """
-    def __init__(self, hidden_size, frequency_embedding_size=256, cond_proj_dim=None, out_size=None):
-        super().__init__()
-        if out_size is None:
-            out_size = hidden_size
-        self.mlp = nn.Sequential(
-            nn.Linear(hidden_size, frequency_embedding_size, bias=True),
-            nn.GELU(),
-            nn.Linear(frequency_embedding_size, out_size, bias=True),
-        )
-        self.frequency_embedding_size = frequency_embedding_size
-        if cond_proj_dim is not None:
-            self.cond_proj = nn.Linear(cond_proj_dim, frequency_embedding_size, bias=False)
-        self.time_embed = Timesteps(hidden_size)
-    def forward(self, t, condition):
-        t_freq = self.time_embed(t).type(self.mlp[0].weight.dtype)
-        # t_freq = timestep_embedding(t, self.frequency_embedding_size).type(self.mlp[0].weight.dtype)
-        if condition is not None:
-            t_freq = t_freq + self.cond_proj(condition)
-        t = self.mlp(t_freq)
-        t = t.unsqueeze(dim=1)
-        return t
-class MLP(nn.Module):
-    def __init__(self, *, width: int):
-        super().__init__()
-        self.width = width
-        self.fc1 = nn.Linear(width, width * 4)
-        self.fc2 = nn.Linear(width * 4, width)
-        self.gelu = nn.GELU()
-    def forward(self, x):
-        return self.fc2(self.gelu(self.fc1(x)))
-class CrossAttention(nn.Module):
-    def __init__(
-        self,
-        qdim,
-        kdim,
-        num_heads,
-        qkv_bias=True,
-        qk_norm=False,
-        norm_layer=nn.LayerNorm,
-        **kwargs,
-    ):
-        super().__init__()
-        self.qdim = qdim
-        self.kdim = kdim
-        self.num_heads = num_heads
-        assert self.qdim % num_heads == 0, "self.qdim must be divisible by num_heads"
-        self.head_dim = self.qdim // num_heads
-        assert self.head_dim % 8 == 0 and self.head_dim <= 128, "Only support head_dim <= 128 and divisible by 8"
-        self.scale = self.head_dim ** -0.5
-        self.to_q = nn.Linear(qdim, qdim, bias=qkv_bias)
-        self.to_k = nn.Linear(kdim, qdim, bias=qkv_bias)
-        self.to_v = nn.Linear(kdim, qdim, bias=qkv_bias)
-        # TODO: eps should be 1 / 65530 if using fp16
-        self.q_norm = norm_layer(self.head_dim, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity()
-        self.k_norm = norm_layer(self.head_dim, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity()
-        self.out_proj = nn.Linear(qdim, qdim, bias=True)
-    def forward(self, x, y):
-        """
-        Parameters
-        ----------
-        x: torch.Tensor
-            (batch, seqlen1, hidden_dim) (where hidden_dim = num heads * head dim)
-        y: torch.Tensor
-            (batch, seqlen2, hidden_dim2) - may contain padding (marked with -1)
-        freqs_cis_img: torch.Tensor
-            (batch, hidden_dim // 2), RoPE for image
-        """
-        b, s1, c = x.shape  # [b, s1, D]
-        # Detect padding tokens: check if all values in the feature dimension are -1
-        # y_mask: [b, s2], True for valid tokens, False for padding
-        y_mask = (y != -1).any(dim=-1)  # [b, s2]
-        has_padding = not y_mask.all()
-        _, s2, c = y.shape  # [b, s2, 1024]
-        q = self.to_q(x)
-        k = self.to_k(y)
-        v = self.to_v(y)
-        kv = torch.cat((k, v), dim=-1)
-        split_size = kv.shape[-1] // self.num_heads // 2
-        kv = kv.view(1, -1, self.num_heads, split_size * 2)
-        k, v = torch.split(kv, split_size, dim=-1)
-        q = q.view(b, s1, self.num_heads, self.head_dim)  # [b, s1, h, d]
-        k = k.view(b, s2, self.num_heads, self.head_dim)  # [b, s2, h, d]
-        v = v.view(b, s2, self.num_heads, self.head_dim)  # [b, s2, h, d]
-        q = self.q_norm(q)
-        k = self.k_norm(k)
-        if has_padding:
-            seqlens_k = y_mask.sum(dim=1).int()
-            q_flat = q.reshape(-1, self.num_heads, self.head_dim)
-            # For k, v: only keep valid tokens (remove padding)
-            # Create indices for valid tokens
-            valid_indices = []
-            cu_seqlens_k = [0]
-            for i in range(b):
-                valid_len = seqlens_k[i].item()
-                batch_indices = torch.arange(valid_len, device=y.device) + i * s2
-                valid_indices.append(batch_indices)
-                cu_seqlens_k.append(cu_seqlens_k[-1] + valid_len)
-            valid_indices = torch.cat(valid_indices)
-            k_flat = k.reshape(b * s2, self.num_heads, self.head_dim)[valid_indices]  # [total_k, h, d]
-            v_flat = v.reshape(b * s2, self.num_heads, self.head_dim)[valid_indices]  # [total_k, h, d]
-            # Create cumulative sequence lengths
-            cu_seqlens_q = torch.arange(0, (b + 1) * s1, s1, dtype=torch.int32, device=x.device)
-            cu_seqlens_k = torch.tensor(cu_seqlens_k, dtype=torch.int32, device=x.device)
-            # Call flash attention varlen
-            q_flat = q_flat.to(torch.bfloat16)
-            k_flat = k_flat.to(torch.bfloat16)
-            v_flat = v_flat.to(torch.bfloat16)
-            context = flash_attn_varlen_func(
-                q_flat, k_flat, v_flat,
-                cu_seqlens_q, cu_seqlens_k,
-                s1, seqlens_k.max().item(),
-                dropout_p=0.0,
-                softmax_scale=None,
-                causal=False
-            )
-            context = context.reshape(b, s1, -1)
-        else:
-            with torch.backends.cuda.sdp_kernel(
-                enable_flash=True,
-                enable_math=False,
-                enable_mem_efficient=True
-            ):
-                q, k, v = map(lambda t: rearrange(t, 'b n h d -> b h n d', h=self.num_heads), (q, k, v))
-                attn_mask = None
-                context = F.scaled_dot_product_attention(
-                    q, k, v, attn_mask=attn_mask
-                ).transpose(1, 2).reshape(b, s1, -1)
-        out = self.out_proj(context)
-        return out
-class Attention(nn.Module):
-    """
-    We rename some layer names to align with flash attention
-    """
-    def __init__(
-        self,
-        dim,
-        num_heads,
-        qkv_bias=True,
-        qk_norm=False,
-        norm_layer=nn.LayerNorm,
-    ):
-        super().__init__()
-        self.dim = dim
-        self.num_heads = num_heads
-        assert self.dim % num_heads == 0, 'dim should be divisible by num_heads'
-        self.head_dim = self.dim // num_heads
-        # This assertion is aligned with flash attention
-        assert self.head_dim % 8 == 0 and self.head_dim <= 128, "Only support head_dim <= 128 and divisible by 8"
-        self.scale = self.head_dim ** -0.5
-        self.to_q = nn.Linear(dim, dim, bias=qkv_bias)
-        self.to_k = nn.Linear(dim, dim, bias=qkv_bias)
-        self.to_v = nn.Linear(dim, dim, bias=qkv_bias)
-        self.q_norm = norm_layer(self.head_dim, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity()
-        self.k_norm = norm_layer(self.head_dim, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity()
-        self.out_proj = nn.Linear(dim, dim)
-    # def forward(self, x):
-    def forward(self, x, rotary_cos=None, rotary_sin=None):
-        B, N, C = x.shape
-        q = self.to_q(x)
-        k = self.to_k(x)
-        v = self.to_v(x)
-        qkv = torch.cat((q, k, v), dim=-1)
-        split_size = qkv.shape[-1] // self.num_heads // 3
-        qkv = qkv.view(1, -1, self.num_heads, split_size * 3)
-        q, k, v = torch.split(qkv, split_size, dim=-1)
-        q = q.reshape(B, N, self.num_heads, self.head_dim).transpose(1, 2)  # [b, h, s, d]
-        k = k.reshape(B, N, self.num_heads, self.head_dim).transpose(1, 2)  # [b, h, s, d]
-        v = v.reshape(B, N, self.num_heads, self.head_dim).transpose(1, 2)
-        q = self.q_norm(q)  # [b, h, s, d]
-        k = self.k_norm(k)  # [b, h, s, d]
-        # ========================= Apply RoPE =========================
-        if rotary_cos is not None:
-            q = apply_rotary_emb(q, rotary_cos, rotary_sin)
-            k = apply_rotary_emb(k, rotary_cos, rotary_sin)
-        # ==============================================================
-        with torch.backends.cuda.sdp_kernel(
-            enable_flash=True,
-            enable_math=False,
-            enable_mem_efficient=True
-        ):
-            x = F.scaled_dot_product_attention(q, k, v)
-            x = x.transpose(1, 2).reshape(B, N, -1)
-        x = self.out_proj(x)
-        return x
-class DiTBlock(nn.Module):
-    def __init__(
-        self,
-        hidden_size,
-        c_emb_size,
-        num_heads,
-        text_states_dim=1024,
-        use_flash_attn=False,
-        qk_norm=False,
-        norm_layer=nn.LayerNorm,
-        qk_norm_layer=nn.RMSNorm,
-        init_scale=1.0,
-        qkv_bias=True,
-        skip_connection=True,
-        timested_modulate=False,
-        use_moe: bool = False,
-        num_experts: int = 8,
-        moe_top_k: int = 2,
-        **kwargs,
-    ):
-        super().__init__()
-        self.use_flash_attn = use_flash_attn
-        use_ele_affine = True
-        # ========================= Self-Attention =========================
-        self.norm1 = norm_layer(hidden_size, elementwise_affine=use_ele_affine, eps=1e-6)
-        self.attn1 = Attention(hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, qk_norm=qk_norm,
-                               norm_layer=qk_norm_layer)
-        # ========================= FFN =========================
-        self.norm2 = norm_layer(hidden_size, elementwise_affine=use_ele_affine, eps=1e-6)
-        # ========================= Add =========================
-        # Simply use add like SDXL.
-        self.timested_modulate = timested_modulate
-        if self.timested_modulate:
-            self.default_modulation = nn.Sequential(
-                nn.SiLU(),
-                nn.Linear(c_emb_size, hidden_size, bias=True)
-            )
-        # ========================= Cross-Attention =========================
-        self.attn2 = CrossAttention(hidden_size, text_states_dim, num_heads=num_heads, qkv_bias=qkv_bias,
-                                    qk_norm=qk_norm, norm_layer=qk_norm_layer, init_scale=init_scale)
-        self.norm3 = norm_layer(hidden_size, elementwise_affine=True, eps=1e-6)
-        if skip_connection:
-            self.skip_norm = norm_layer(hidden_size, elementwise_affine=True, eps=1e-6)
-            self.skip_linear = nn.Linear(2 * hidden_size, hidden_size)
-        else:
-            self.skip_linear = None
-        self.use_moe = use_moe
-        if self.use_moe:
-            self.moe = MoEBlock(
-                hidden_size,
-                num_experts=num_experts,
-                moe_top_k=moe_top_k,
-                dropout=0.0,
-                activation_fn="gelu",
-                final_dropout=False,
-                ff_inner_dim=int(hidden_size * 4.0),
-                ff_bias=True,
-            )
-        else:
-            self.mlp = MLP(width=hidden_size)
-    def forward(self, x, c=None, text_states=None, skip_value=None, rotary_cos=None, rotary_sin=None):
-        if self.skip_linear is not None:
-            cat = torch.cat([skip_value, x], dim=-1)
-            x = self.skip_linear(cat)
-            x = self.skip_norm(x)
-        # Self-Attention
-        if self.timested_modulate:
-            shift_msa = self.default_modulation(c).unsqueeze(dim=1)
-            x = x + shift_msa
-        attn_out = self.attn1(self.norm1(x), rotary_cos=rotary_cos, rotary_sin=rotary_sin)
-        x = x + attn_out
-        # Cross-Attention
-        x = x + self.attn2(self.norm2(x), text_states)
-        # FFN Layer
-        mlp_inputs = self.norm3(x)
-        if self.use_moe:
-            x = x + self.moe(mlp_inputs)
-        else:
-            x = x + self.mlp(mlp_inputs)
-        return x
-class AttentionPool(nn.Module):
-    def __init__(self, spacial_dim: int, embed_dim: int, num_heads: int, output_dim: int = None):
-        super().__init__()
-        self.positional_embedding = nn.Parameter(torch.randn(spacial_dim + 1, embed_dim) / embed_dim ** 0.5)
-        self.k_proj = nn.Linear(embed_dim, embed_dim)
-        self.q_proj = nn.Linear(embed_dim, embed_dim)
-        self.v_proj = nn.Linear(embed_dim, embed_dim)
-        self.c_proj = nn.Linear(embed_dim, output_dim or embed_dim)
-        self.num_heads = num_heads
-    def forward(self, x, attention_mask=None):
-        x = x.permute(1, 0, 2)  # NLC -> LNC
-        if attention_mask is not None:
-            attention_mask = attention_mask.unsqueeze(-1).permute(1, 0, 2)
-            global_emb = (x * attention_mask).sum(dim=0) / attention_mask.sum(dim=0)
-            x = torch.cat([global_emb[None,], x], dim=0)
-        else:
-            x = torch.cat([x.mean(dim=0, keepdim=True), x], dim=0)  # (L+1)NC
-        x = x + self.positional_embedding[:, None, :].to(x.dtype)  # (L+1)NC
-        x, _ = F.multi_head_attention_forward(
-            query=x[:1], key=x, value=x,
-            embed_dim_to_check=x.shape[-1],
-            num_heads=self.num_heads,
-            q_proj_weight=self.q_proj.weight,
-            k_proj_weight=self.k_proj.weight,
-            v_proj_weight=self.v_proj.weight,
-            in_proj_weight=None,
-            in_proj_bias=torch.cat([self.q_proj.bias, self.k_proj.bias, self.v_proj.bias]),
-            bias_k=None,
-            bias_v=None,
-            add_zero_attn=False,
-            dropout_p=0,
-            out_proj_weight=self.c_proj.weight,
-            out_proj_bias=self.c_proj.bias,
-            use_separate_proj_weight=True,
-            training=self.training,
-            need_weights=False
-        )
-        return x.squeeze(0)
-class FinalLayer(nn.Module):
-    """
-    The final layer of DiT.
-    """
-    def __init__(self, final_hidden_size, out_channels):
-        super().__init__()
-        self.final_hidden_size = final_hidden_size
-        self.norm_final = nn.LayerNorm(final_hidden_size, elementwise_affine=True, eps=1e-6)
-        self.linear = nn.Linear(final_hidden_size, out_channels, bias=True)
-    def forward(self, x):
-        x = self.norm_final(x)
-        x = x[:, 1:]
-        x = self.linear(x)
-        return x
-class RefineDiT(nn.Module):
-    @classmethod
-    @synchronize_timer('Refine Model Loading')
-    def from_single_file(
-        cls,
-        ckpt_path,
-        config_path,
-        device='cuda',
-        dtype=torch.float16,
-        use_safetensors=None,
-        **kwargs,
-    ):
-        # load config
-        with open(config_path, 'r') as f:
-            config = yaml.safe_load(f)
-        # load ckpt
-        if use_safetensors:
-            ckpt_path = ckpt_path.replace('.ckpt', '.safetensors')
-        if not os.path.exists(ckpt_path):
-            raise FileNotFoundError(f"Model file {ckpt_path} not found")
-        logger.info(f"Loading model from {ckpt_path}")
-        if use_safetensors:
-            import safetensors.torch
-            ckpt = safetensors.torch.load_file(ckpt_path, device='cpu')
-        else:
-            ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=True)
-        if 'model' in ckpt:
-            ckpt = ckpt['model']
-        if 'model' in config:
-            config = config['model']
-        model_kwargs = config['params']
-        model_kwargs.update(kwargs)
-        model = cls(**model_kwargs)
-        model.load_state_dict(ckpt)
-        model.to(device=device, dtype=dtype)
-        return model
-    @classmethod
-    def from_pretrained(
-        cls,
-        model_path,
-        device='cuda',
-        dtype=torch.float16,
-        use_safetensors=False,
-        variant='fp16',
-        subfolder='hunyuan3d-dit-v2-1',
-        **kwargs,
-    ):
-        config_path, ckpt_path = smart_load_model(
-            model_path,
-            subfolder=subfolder,
-            use_safetensors=use_safetensors,
-            variant=variant
-        )
-        return cls.from_single_file(
-            ckpt_path,
-            config_path,
-            device=device,
-            dtype=dtype,
-            use_safetensors=use_safetensors,
-            **kwargs
-        )
-    def __init__(
-        self,
-        input_size=1024,
-        in_channels=4,
-        hidden_size=1024,
-        context_dim=1024,
-        depth=24,
-        num_heads=16,
-        mlp_ratio=4.0,
-        norm_type='layer',
-        qk_norm_type='rms',
-        qk_norm=False,
-        text_len=257,
-        guidance_cond_proj_dim=None,
-        qkv_bias=True,
-        num_moe_layers: int = 6,
-        num_experts: int = 8,
-        moe_top_k: int = 2,
-        voxel_query_res: int = 128,
-        **kwargs
-    ):
-        super().__init__()
-        self.input_size = input_size
-        self.depth = depth
-        self.in_channels = in_channels
-        self.out_channels = in_channels
-        self.num_heads = num_heads
-        self.hidden_size = hidden_size
-        self.norm = nn.LayerNorm if norm_type == 'layer' else nn.RMSNorm
-        self.qk_norm = nn.RMSNorm if qk_norm_type == 'rms' else nn.LayerNorm
-        self.context_dim = context_dim
-        self.voxel_query_res = voxel_query_res
-        self.guidance_cond_proj_dim = guidance_cond_proj_dim
-        self.text_len = text_len
-        self.x_embedder = nn.Linear(in_channels, hidden_size, bias=True)
-        self.t_embedder = TimestepEmbedder(hidden_size, hidden_size * 4, cond_proj_dim=guidance_cond_proj_dim)
-        self.blocks = nn.ModuleList([
-            DiTBlock(hidden_size=hidden_size,
-                            c_emb_size=hidden_size,
-                            num_heads=num_heads,
-                            mlp_ratio=mlp_ratio,
-                            text_states_dim=context_dim,
-                            qk_norm=qk_norm,
-                            norm_layer=self.norm,
-                            qk_norm_layer=self.qk_norm,
-                            skip_connection=layer > depth // 2,
-                            qkv_bias=qkv_bias,
-                            use_moe=True if depth - layer <= num_moe_layers else False,
-                            num_experts=num_experts,
-                            moe_top_k=moe_top_k
-                            )
-            for layer in range(depth)
-        ])
-        self.depth = depth
-        self.final_layer = FinalLayer(hidden_size, self.out_channels)
-    def forward(self, x, t, contexts, **kwargs):
-        cond = contexts['main']
-        t = self.t_embedder(t, condition=kwargs.get('guidance_cond'))
-        x = self.x_embedder(x)
-        c = t
-        ##########################################
-        head_dim = self.blocks[0].attn1.head_dim
-        num_cond_tokens = c.shape[1] if c.dim() == 3 else 1
-        device = x.device
-        cond_cos = torch.ones(x.shape[0], num_cond_tokens, head_dim, device=device)
-        cond_sin = torch.zeros(x.shape[0], num_cond_tokens, head_dim, device=device)
-        voxel_cond = kwargs.get('voxel_cond')
-        # rotary_cos_vox, rotary_sin_vox = precompute_freqs_cis_3d(head_dim, voxel_cond)
-        rotary_cos_vox, rotary_sin_vox = precompute_freqs_cis_3d_interpolated(
-            head_dim, voxel_cond, current_res=self.voxel_query_res)
-        rotary_cos = torch.cat([cond_cos, rotary_cos_vox], dim=1)
-        rotary_sin = torch.cat([cond_sin, rotary_sin_vox], dim=1)
-        ##########################################
-        x = torch.cat([c, x], dim=1)
-        skip_value_list = []
-        for layer, block in enumerate(self.blocks):
-            skip_value = None if layer <= self.depth // 2 else skip_value_list.pop()
-            x = block(x, c, cond, rotary_cos=rotary_cos, rotary_sin=rotary_sin, skip_value=skip_value)
-            if layer < self.depth // 2:
-                skip_value_list.append(x)
-        x = self.final_layer(x)
-        return x
-def apply_rotary_emb(x, cos, sin):
-    """
-    x: [B, H, N, D]
-    cos, sin: [B, N, D]
-    """
-    cos = cos.unsqueeze(1)
-    sin = sin.unsqueeze(1)
-    def rotate_half(x):
-        x1, x2 = x.chunk(2, dim=-1)
-        return torch.cat((-x2, x1), dim=-1)
-    return (x * cos) + (rotate_half(x) * sin)
-def precompute_freqs_cis_3d(dim: int, grid_indices: torch.Tensor, theta: float = 10000.0):
-    """
-    grid_indices: [B, N, 3] voxel idx
-    """
-    dim_x = dim // 3
-    dim_y = dim // 3
-    dim_z = dim - dim_x - dim_y
-    device = grid_indices.device
-    freqs_x = 1.0 / (theta ** (torch.arange(0, dim_x, 2, device=device).float() / dim_x))
-    freqs_y = 1.0 / (theta ** (torch.arange(0, dim_y, 2, device=device).float() / dim_y))
-    freqs_z = 1.0 / (theta ** (torch.arange(0, dim_z, 2, device=device).float() / dim_z))
-    x_idx = grid_indices[..., 0].float()
-    y_idx = grid_indices[..., 1].float()
-    z_idx = grid_indices[..., 2].float()
-    args_x = x_idx.unsqueeze(-1) * freqs_x.unsqueeze(0).unsqueeze(0)
-    args_y = y_idx.unsqueeze(-1) * freqs_y.unsqueeze(0).unsqueeze(0)
-    args_z = z_idx.unsqueeze(-1) * freqs_z.unsqueeze(0).unsqueeze(0)
-    args = torch.cat([args_x, args_y, args_z], dim=-1)
-    args = torch.cat([args, args], dim=-1)
-    return torch.cos(args), torch.sin(args)
-def precompute_freqs_cis_3d_interpolated(
-    dim: int,
-    grid_indices: torch.Tensor,
-    theta: float = 10000.0,
-    trained_res: float = 128.0,  # training resolution
-    current_res: float = 256.0,  # inference resolution
-):
-    scale_factor = current_res / trained_res
-    dim_x = dim // 3
-    dim_y = dim // 3
-    dim_z = dim - dim_x - dim_y
-    device = grid_indices.device
-    freqs_x = 1.0 / (theta ** (torch.arange(0, dim_x, 2, device=device).float() / dim_x))
-    freqs_y = 1.0 / (theta ** (torch.arange(0, dim_y, 2, device=device).float() / dim_y))
-    freqs_z = 1.0 / (theta ** (torch.arange(0, dim_z, 2, device=device).float() / dim_z))
-    num_freqs_x = dim_x // 2 + (dim_x % 2)
-    num_freqs_y = dim_y // 2 + (dim_y % 2)
-    target_len = dim // 2
-    freqs_x = freqs_x[:num_freqs_x]
-    freqs_y = freqs_y[:num_freqs_y]
-    freqs_z = freqs_z[:(target_len - len(freqs_x) - len(freqs_y))]
-    input_x = grid_indices[..., 0].float()
-    input_y = grid_indices[..., 1].float()
-    input_z = grid_indices[..., 2].float()
-    # Apply Scaling
-    pos_x = input_x / scale_factor
-    pos_y = input_y / scale_factor
-    pos_z = input_z / scale_factor
-    # pos * freq
-    args_x = pos_x.unsqueeze(-1) * freqs_x.unsqueeze(0).unsqueeze(0)
-    args_y = pos_y.unsqueeze(-1) * freqs_y.unsqueeze(0).unsqueeze(0)
-    args_z = pos_z.unsqueeze(-1) * freqs_z.unsqueeze(0).unsqueeze(0)
-    args = torch.cat([args_x, args_y, args_z], dim=-1)
-    args = torch.cat([args, args], dim=-1)
-    return torch.cos(args), torch.sin(args)

ultrashape/models/denoisers/moe_layers.py DELETED Viewed

@@ -1,177 +0,0 @@
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import torch
-import torch.nn as nn
-import numpy as np
-import math
-from timm.models.vision_transformer import PatchEmbed, Attention, Mlp
-import torch.nn.functional as F
-from diffusers.models.attention import FeedForward
-class AddAuxiliaryLoss(torch.autograd.Function):
-    """
-    The trick function of adding auxiliary (aux) loss,
-    which includes the gradient of the aux loss during backpropagation.
-    """
-    @staticmethod
-    def forward(ctx, x, loss):
-        assert loss.numel() == 1
-        ctx.dtype = loss.dtype
-        ctx.required_aux_loss = loss.requires_grad
-        return x
-    @staticmethod
-    def backward(ctx, grad_output):
-        grad_loss = None
-        if ctx.required_aux_loss:
-            grad_loss = torch.ones(1, dtype=ctx.dtype, device=grad_output.device)
-        return grad_output, grad_loss
-class MoEGate(nn.Module):
-    def __init__(self, embed_dim, num_experts=16, num_experts_per_tok=2, aux_loss_alpha=0.01):
-        super().__init__()
-        self.top_k = num_experts_per_tok
-        self.n_routed_experts = num_experts
-        self.scoring_func = 'softmax'
-        self.alpha = aux_loss_alpha
-        self.seq_aux = False
-        # topk selection algorithm
-        self.norm_topk_prob = False
-        self.gating_dim = embed_dim
-        self.weight = nn.Parameter(torch.empty((self.n_routed_experts, self.gating_dim)))
-        self.reset_parameters()
-    def reset_parameters(self) -> None:
-        import torch.nn.init  as init
-        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
-    def forward(self, hidden_states):
-        bsz, seq_len, h = hidden_states.shape
-        # print(bsz, seq_len, h)
-        ### compute gating score
-        hidden_states = hidden_states.view(-1, h)
-        logits = F.linear(hidden_states, self.weight, None)
-        if self.scoring_func == 'softmax':
-            scores = logits.softmax(dim=-1)
-        else:
-            raise NotImplementedError(f'insupportable scoring function for MoE gating: {self.scoring_func}')
-        ### select top-k experts
-        topk_weight, topk_idx = torch.topk(scores, k=self.top_k, dim=-1, sorted=False)
-        ### norm gate to sum 1
-        if self.top_k > 1 and self.norm_topk_prob:
-            denominator = topk_weight.sum(dim=-1, keepdim=True) + 1e-20
-            topk_weight = topk_weight / denominator
-        ### expert-level computation auxiliary loss
-        if self.training and self.alpha > 0.0:
-            scores_for_aux = scores
-            aux_topk = self.top_k
-            # always compute aux loss based on the naive greedy topk method
-            topk_idx_for_aux_loss = topk_idx.view(bsz, -1)
-            if self.seq_aux:
-                scores_for_seq_aux = scores_for_aux.view(bsz, seq_len, -1)
-                ce = torch.zeros(bsz, self.n_routed_experts, device=hidden_states.device)
-                ce.scatter_add_(
-                    1,
-                    topk_idx_for_aux_loss,
-                    torch.ones(
-                        bsz, seq_len * aux_topk,
-                        device=hidden_states.device
-                    )
-                ).div_(seq_len * aux_topk / self.n_routed_experts)
-                aux_loss = (ce * scores_for_seq_aux.mean(dim = 1)).sum(dim = 1).mean()
-                aux_loss = aux_loss * self.alpha
-            else:
-                mask_ce = F.one_hot(topk_idx_for_aux_loss.view(-1),
-                                    num_classes=self.n_routed_experts)
-                ce = mask_ce.float().mean(0)
-                Pi = scores_for_aux.mean(0)
-                fi = ce * self.n_routed_experts
-                aux_loss = (Pi * fi).sum() * self.alpha
-        else:
-            aux_loss = None
-        return topk_idx, topk_weight, aux_loss
-class MoEBlock(nn.Module):
-    def __init__(self, dim, num_experts=8, moe_top_k=2,
-                    activation_fn = "gelu", dropout=0.0, final_dropout = False,
-                    ff_inner_dim = None, ff_bias = True):
-        super().__init__()
-        self.moe_top_k = moe_top_k
-        self.experts = nn.ModuleList([
-                FeedForward(dim,dropout=dropout,
-                            activation_fn=activation_fn,
-                            final_dropout=final_dropout,
-                            inner_dim=ff_inner_dim,
-                            bias=ff_bias)
-        for i in range(num_experts)])
-        self.gate = MoEGate(embed_dim=dim, num_experts=num_experts, num_experts_per_tok=moe_top_k)
-        self.shared_experts = FeedForward(dim,dropout=dropout, activation_fn=activation_fn,
-                                          final_dropout=final_dropout,  inner_dim=ff_inner_dim,
-                                          bias=ff_bias)
-    def initialize_weight(self):
-        pass
-    def forward(self, hidden_states):
-        identity = hidden_states
-        orig_shape = hidden_states.shape
-        topk_idx, topk_weight, aux_loss = self.gate(hidden_states)
-        hidden_states = hidden_states.view(-1, hidden_states.shape[-1])
-        flat_topk_idx = topk_idx.view(-1)
-        if self.training:
-            hidden_states = hidden_states.repeat_interleave(self.moe_top_k, dim=0)
-            y = torch.empty_like(hidden_states, dtype=hidden_states.dtype)
-            for i, expert in enumerate(self.experts):
-                tmp = expert(hidden_states[flat_topk_idx == i])
-                y[flat_topk_idx == i] = tmp.to(hidden_states.dtype)
-            y = (y.view(*topk_weight.shape, -1) * topk_weight.unsqueeze(-1)).sum(dim=1)
-            y =  y.view(*orig_shape)
-            y = AddAuxiliaryLoss.apply(y, aux_loss)
-        else:
-            y = self.moe_infer(hidden_states, flat_topk_idx, topk_weight.view(-1, 1)).view(*orig_shape)
-        y = y + self.shared_experts(identity)
-        return y
-    @torch.no_grad()
-    def moe_infer(self, x, flat_expert_indices, flat_expert_weights):
-        expert_cache = torch.zeros_like(x)
-        idxs = flat_expert_indices.argsort()
-        tokens_per_expert = flat_expert_indices.bincount().cpu().numpy().cumsum(0)
-        token_idxs = idxs // self.moe_top_k
-        for i, end_idx in enumerate(tokens_per_expert):
-            start_idx = 0 if i == 0 else tokens_per_expert[i-1]
-            if start_idx == end_idx:
-                continue
-            expert = self.experts[i]
-            exp_token_idx = token_idxs[start_idx:end_idx]
-            expert_tokens = x[exp_token_idx]
-            expert_out = expert(expert_tokens)
-            expert_out.mul_(flat_expert_weights[idxs[start_idx:end_idx]])
-            # for fp16 and other dtype
-            expert_cache = expert_cache.to(expert_out.dtype)
-            expert_cache.scatter_reduce_(0, exp_token_idx.view(-1, 1).repeat(1, x.shape[-1]),
-                                         expert_out,
-                                         reduce='sum')
-        return expert_cache

ultrashape/models/diffusion/flow_matching_dit_trainer.py DELETED Viewed

@@ -1,313 +0,0 @@
-import os
-from contextlib import contextmanager
-from typing import List, Tuple, Optional, Union
-import torch
-import torch.nn as nn
-from torch.optim import lr_scheduler
-import pytorch_lightning as pl
-from pytorch_lightning.utilities import rank_zero_info
-from pytorch_lightning.utilities import rank_zero_only
-from ultrashape.pipelines import export_to_trimesh
-from ...utils.ema import LitEma
-from ...utils.misc import instantiate_from_config, instantiate_non_trainable_model, instantiate_vae_model, instantiate_vae_model_local
-class Diffuser(pl.LightningModule):
-    def __init__(
-        self,
-        *,
-        vae_config,
-        cond_config,
-        dit_cfg,
-        scheduler_cfg,
-        optimizer_cfg,
-        pipeline_cfg=None,
-        image_processor_cfg=None,
-        lora_config=None,
-        ema_config=None,
-        scale_by_std: bool = False,
-        z_scale_factor: float = 1.0,
-        ckpt_path: Optional[str] = None,
-        ignore_keys: Union[Tuple[str], List[str]] = (),
-        torch_compile: bool = False,
-    ):
-        super().__init__()
-        # ========= init optimizer config ========= #
-        self.optimizer_cfg = optimizer_cfg
-        # ========= init diffusion scheduler ========= #
-        self.scheduler_cfg = scheduler_cfg
-        self.sampler = None
-        if 'transport' in scheduler_cfg:
-            self.transport = instantiate_from_config(scheduler_cfg.transport)
-            self.sampler = instantiate_from_config(scheduler_cfg.sampler, transport=self.transport)
-            self.sample_fn = self.sampler.sample_ode(**scheduler_cfg.sampler.ode_params)
-        # ========= init the model ========= #
-        self.dit_cfg = dit_cfg
-        self.model = instantiate_from_config(dit_cfg, device=None, dtype=None)
-        self.cond_stage_model = instantiate_from_config(cond_config)
-        self.ckpt_path = ckpt_path
-        if ckpt_path is not None:
-            self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys)
-        # ========= config lora model ========= #
-        if lora_config is not None:
-            from peft import LoraConfig, get_peft_model
-            loraconfig = LoraConfig(
-                r=lora_config.rank,
-                lora_alpha=lora_config.rank,
-                target_modules=lora_config.get('target_modules')
-            )
-            self.model = get_peft_model(self.model, loraconfig)
-        # ========= config ema model ========= #
-        self.ema_config = ema_config
-        if self.ema_config is not None:
-            if self.ema_config.ema_model == 'DSEma':
-                # from michelangelo.models.modules.ema_deepspeed import DSEma
-                from ..utils.ema_deepspeed import DSEma
-                self.model_ema = DSEma(self.model, decay=self.ema_config.ema_decay)
-            else:
-                self.model_ema = LitEma(self.model, decay=self.ema_config.ema_decay)
-            #do not initilize EMA weight from ckpt path, since I need to change moe layers
-            if ckpt_path is not None:
-                self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys)
-            print(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.")
-        # ========= init vae at last to prevent it is overridden by loaded ckpt ========= #
-        self.first_stage_model = instantiate_vae_model_local(vae_config)
-        self.first_stage_model.enable_flashvdm_decoder()
-        self.scale_by_std = scale_by_std
-        if scale_by_std:
-            self.register_buffer("z_scale_factor", torch.tensor(z_scale_factor))
-        else:
-            self.z_scale_factor = z_scale_factor
-        # ========= init pipeline for inference ========= #
-        self.image_processor_cfg = image_processor_cfg
-        self.image_processor = None
-        if self.image_processor_cfg is not None:
-            self.image_processor = instantiate_from_config(self.image_processor_cfg)
-        self.pipeline_cfg = pipeline_cfg
-        from ...schedulers import FlowMatchEulerDiscreteScheduler
-        scheduler = FlowMatchEulerDiscreteScheduler(num_train_timesteps=1000)
-        self.pipeline = instantiate_from_config(
-            pipeline_cfg,
-            vae=self.first_stage_model,
-            model=self.model,
-            scheduler=scheduler,
-            conditioner=self.cond_stage_model,
-            image_processor=self.image_processor,
-        )
-        # ========= torch compile to accelerate ========= #
-        self.torch_compile = torch_compile
-        if self.torch_compile:
-            torch.nn.Module.compile(self.model)
-            torch.nn.Module.compile(self.first_stage_model)
-            torch.nn.Module.compile(self.cond_stage_model)
-            print(f'*' * 100)
-            print(f'Compile model for acceleration')
-            print(f'*' * 100)
-    @contextmanager
-    def ema_scope(self, context=None):
-        if self.ema_config is not None and self.ema_config.get('ema_inference', False):
-            self.model_ema.store(self.model)
-            self.model_ema.copy_to(self.model)
-            if context is not None:
-                print(f"{context}: Switched to EMA weights")
-        try:
-            yield None
-        finally:
-            if self.ema_config is not None and self.ema_config.get('ema_inference', False):
-                self.model_ema.restore(self.model)
-                if context is not None:
-                    print(f"{context}: Restored training weights")
-    def init_from_ckpt(self, path, ignore_keys=()):
-        ckpt = torch.load(path, map_location="cpu")
-        if 'state_dict' not in ckpt:
-            # deepspeed ckpt
-            state_dict = {}
-            for k in ckpt.keys():
-                new_k = k.replace('_forward_module.', '')
-                state_dict[new_k] = ckpt[k]
-        else:
-            state_dict = ckpt["state_dict"]
-        keys = list(state_dict.keys())
-        for k in keys:
-            for ik in ignore_keys:
-                if ik in k:
-                    print("Deleting key {} from state_dict.".format(k))
-                    del state_dict[k]
-        missing, unexpected = self.load_state_dict(state_dict, strict=False)
-        print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys")
-        if len(missing) > 0:
-            print(f"Missing Keys: {missing}")
-            print(f"Unexpected Keys: {unexpected}")
-    def on_load_checkpoint(self, checkpoint):
-        """
-        The pt_model is trained separately, so we already have access to its
-        checkpoint and load it separately with `self.set_pt_model`.
-        However, the PL Trainer is strict about
-        checkpoint loading (not configurable), so it expects the loaded state_dict
-        to match exactly the keys in the model state_dict.
-        So, when loading the checkpoint, before matching keys, we add all pt_model keys
-        from self.state_dict() to the checkpoint state dict, so that they match
-        """
-        for key in self.state_dict().keys():
-            if key.startswith("model_ema") and key not in checkpoint["state_dict"]:
-                checkpoint["state_dict"][key] = self.state_dict()[key]
-    def configure_optimizers(self) -> Tuple[List, List]:
-        lr = self.learning_rate
-        params_list = []
-        trainable_parameters = list(self.model.parameters())
-        params_list.append({'params': trainable_parameters, 'lr': lr})
-        no_decay = ['bias', 'norm.weight', 'norm.bias', 'norm1.weight', 'norm1.bias', 'norm2.weight', 'norm2.bias']
-        if self.optimizer_cfg.get('train_image_encoder', False):
-            image_encoder_parameters = list(self.cond_stage_model.named_parameters())
-            image_encoder_parameters_decay = [param for name, param in image_encoder_parameters if
-                                              not any((no_decay_name in name) for no_decay_name in no_decay)]
-            image_encoder_parameters_nodecay = [param for name, param in image_encoder_parameters if
-                                                any((no_decay_name in name) for no_decay_name in no_decay)]
-            # filter trainable params
-            image_encoder_parameters_decay = [param for param in image_encoder_parameters_decay if
-                                              param.requires_grad]
-            image_encoder_parameters_nodecay = [param for param in image_encoder_parameters_nodecay if
-                                                param.requires_grad]
-            print(f"Image Encoder Params: {len(image_encoder_parameters_decay)} decay, ")
-            print(f"Image Encoder Params: {len(image_encoder_parameters_nodecay)} nodecay, ")
-            image_encoder_lr = self.optimizer_cfg['image_encoder_lr']
-            image_encoder_lr_multiply = self.optimizer_cfg.get('image_encoder_lr_multiply', 1.0)
-            image_encoder_lr = image_encoder_lr if image_encoder_lr is not None else lr * image_encoder_lr_multiply
-            params_list.append(
-                {'params': image_encoder_parameters_decay, 'lr': image_encoder_lr,
-                 'weight_decay': 0.05})
-            params_list.append(
-                {'params': image_encoder_parameters_nodecay, 'lr': image_encoder_lr,
-                 'weight_decay': 0.})
-        optimizer = instantiate_from_config(self.optimizer_cfg.optimizer, params=params_list, lr=lr)
-        if hasattr(self.optimizer_cfg, 'scheduler'):
-            scheduler_func = instantiate_from_config(
-                self.optimizer_cfg.scheduler,
-                max_decay_steps=self.trainer.max_steps,
-                lr_max=lr
-            )
-            scheduler = {
-                "scheduler": lr_scheduler.LambdaLR(optimizer, lr_lambda=scheduler_func.schedule),
-                "interval": "step",
-                "frequency": 1
-            }
-            schedulers = [scheduler]
-        else:
-            schedulers = []
-        optimizers = [optimizer]
-        return optimizers, schedulers
-    def on_train_batch_end(self, *args, **kwargs):
-        if self.ema_config is not None:
-            self.model_ema(self.model)
-    def on_train_epoch_start(self) -> None:
-        pl.seed_everything(self.trainer.global_rank)
-    def forward(self, batch, disable_drop):
-        with torch.autocast(device_type="cuda", dtype=torch.bfloat16): #float32 for text
-            contexts = self.cond_stage_model(image=batch.get('image'), text=batch.get('text'), mask=batch.get('mask'), disable_drop=disable_drop)
-        with torch.autocast(device_type="cuda", dtype=torch.float16):
-            with torch.no_grad():
-                latents, voxel_idx = self.first_stage_model.encode(batch["surface"], sample_posterior=True, need_voxel=True)
-                latents = self.z_scale_factor * latents
-                # print(latents.shape)
-        with torch.autocast(device_type="cuda", dtype=torch.bfloat16):
-            loss = self.transport.training_losses(self.model, latents,
-                    dict(contexts=contexts, voxel_cond=voxel_idx))["loss"].mean()
-        return loss
-    def training_step(self, batch, batch_idx, optimizer_idx=0):
-        loss = self.forward(batch, disable_drop=False)
-        split = 'train'
-        loss_dict = {
-            f"{split}/total_loss": loss.detach(),
-            f"{split}/lr_abs": self.optimizers().param_groups[0]['lr'],
-        }
-        self.log_dict(loss_dict, prog_bar=True, logger=True, sync_dist=False, rank_zero_only=True)
-        return loss
-    def validation_step(self, batch, batch_idx, optimizer_idx=0):
-        loss = self.forward(batch, disable_drop=True)
-        split = 'val'
-        loss_dict = {
-            f"{split}/total_loss": loss.detach(),
-            f"{split}/lr_abs": self.optimizers().param_groups[0]['lr'],
-        }
-        self.log_dict(loss_dict, prog_bar=True, logger=True, sync_dist=False, rank_zero_only=True)
-        return loss
-    @torch.no_grad()
-    def sample(self, batch, output_type='trimesh', **kwargs):
-        self.cond_stage_model.disable_drop = True
-        generator = torch.Generator().manual_seed(0)
-        with self.ema_scope("Sample"):
-            with torch.amp.autocast(device_type='cuda'):
-                try:
-                    self.pipeline.device = self.device
-                    self.pipeline.dtype = self.dtype
-                    print("### USING PIPELINE ###")
-                    print(f'device: {self.device} dtype : {self.dtype}')
-                    additional_params = {'output_type':output_type}
-                    image = batch.get("image", None)
-                    mask = batch.get('mask', None)
-                    outputs = self.pipeline(image=image,
-                                            mask=mask,
-                                            generator=generator,
-                                            box_v=1.0,
-                                            mc_level=0.0,
-                                            octree_resolution=1024,
-                                            **additional_params)
-                except Exception as e:
-                    import traceback
-                    traceback.print_exc()
-                    print(f"Unexpected {e=}, {type(e)=}")
-                    with open("error.txt", "a") as f:
-                        f.write(str(e))
-                        f.write(traceback.format_exc())
-                        f.write("\n")
-                    outputs = [None]
-        self.cond_stage_model.disable_drop = False
-        return [outputs]

ultrashape/models/diffusion/transport/__init__.py DELETED Viewed

@@ -1,97 +0,0 @@
-# This file includes code derived from the SiT project (https://github.com/willisma/SiT),
-# which is licensed under the MIT License.
-#
-# MIT License
-#
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-from .transport import Transport, ModelType, WeightType, PathType, Sampler
-def create_transport(
-    path_type='Linear',
-    prediction="velocity",
-    loss_weight=None,
-    train_eps=None,
-    sample_eps=None,
-    train_sample_type="uniform",
-    mean = 0.0,
-    std = 1.0,
-    shift_scale = 1.0,
-):
-    """function for creating Transport object
-    **Note**: model prediction defaults to velocity
-    Args:
-    - path_type: type of path to use; default to linear
-    - learn_score: set model prediction to score
-    - learn_noise: set model prediction to noise
-    - velocity_weighted: weight loss by velocity weight
-    - likelihood_weighted: weight loss by likelihood weight
-    - train_eps: small epsilon for avoiding instability during training
-    - sample_eps: small epsilon for avoiding instability during sampling
-    """
-    if prediction == "noise":
-        model_type = ModelType.NOISE
-    elif prediction == "score":
-        model_type = ModelType.SCORE
-    else:
-        model_type = ModelType.VELOCITY
-    if loss_weight == "velocity":
-        loss_type = WeightType.VELOCITY
-    elif loss_weight == "likelihood":
-        loss_type = WeightType.LIKELIHOOD
-    else:
-        loss_type = WeightType.NONE
-    path_choice = {
-        "Linear": PathType.LINEAR,
-        "GVP": PathType.GVP,
-        "VP": PathType.VP,
-    }
-    path_type = path_choice[path_type]
-    if (path_type in [PathType.VP]):
-        train_eps = 1e-5 if train_eps is None else train_eps
-        sample_eps = 1e-3 if train_eps is None else sample_eps
-    elif (path_type in [PathType.GVP, PathType.LINEAR] and model_type != ModelType.VELOCITY):
-        train_eps = 1e-3 if train_eps is None else train_eps
-        sample_eps = 1e-3 if train_eps is None else sample_eps
-    else:  # velocity & [GVP, LINEAR] is stable everywhere
-        train_eps = 0
-        sample_eps = 0
-    # create flow state
-    state = Transport(
-        model_type=model_type,
-        path_type=path_type,
-        loss_type=loss_type,
-        train_eps=train_eps,
-        sample_eps=sample_eps,
-        train_sample_type=train_sample_type,
-        mean=mean,
-        std=std,
-        shift_scale =shift_scale,
-    )
-    return state

ultrashape/models/diffusion/transport/integrators.py DELETED Viewed

@@ -1,142 +0,0 @@
-# This file includes code derived from the SiT project (https://github.com/willisma/SiT),
-# which is licensed under the MIT License.
-#
-# MIT License
-#
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-import numpy as np
-import torch as th
-import torch.nn as nn
-from torchdiffeq import odeint
-from functools import partial
-from tqdm import tqdm
-class sde:
-    """SDE solver class"""
-    def __init__(
-        self,
-        drift,
-        diffusion,
-        *,
-        t0,
-        t1,
-        num_steps,
-        sampler_type,
-    ):
-        assert t0 < t1, "SDE sampler has to be in forward time"
-        self.num_timesteps = num_steps
-        self.t = th.linspace(t0, t1, num_steps)
-        self.dt = self.t[1] - self.t[0]
-        self.drift = drift
-        self.diffusion = diffusion
-        self.sampler_type = sampler_type
-    def __Euler_Maruyama_step(self, x, mean_x, t, model, **model_kwargs):
-        w_cur = th.randn(x.size()).to(x)
-        t = th.ones(x.size(0)).to(x) * t
-        dw = w_cur * th.sqrt(self.dt)
-        drift = self.drift(x, t, model, **model_kwargs)
-        diffusion = self.diffusion(x, t)
-        mean_x = x + drift * self.dt
-        x = mean_x + th.sqrt(2 * diffusion) * dw
-        return x, mean_x
-    def __Heun_step(self, x, _, t, model, **model_kwargs):
-        w_cur = th.randn(x.size()).to(x)
-        dw = w_cur * th.sqrt(self.dt)
-        t_cur = th.ones(x.size(0)).to(x) * t
-        diffusion = self.diffusion(x, t_cur)
-        xhat = x + th.sqrt(2 * diffusion) * dw
-        K1 = self.drift(xhat, t_cur, model, **model_kwargs)
-        xp = xhat + self.dt * K1
-        K2 = self.drift(xp, t_cur + self.dt, model, **model_kwargs)
-        return xhat + 0.5 * self.dt * (K1 + K2), xhat # at last time point we do not perform the heun step
-    def __forward_fn(self):
-        """TODO: generalize here by adding all private functions ending with steps to it"""
-        sampler_dict = {
-            "Euler": self.__Euler_Maruyama_step,
-            "Heun": self.__Heun_step,
-        }
-        try:
-            sampler = sampler_dict[self.sampler_type]
-        except:
-            raise NotImplementedError("Smapler type not implemented.")
-        return sampler
-    def sample(self, init, model, **model_kwargs):
-        """forward loop of sde"""
-        x = init
-        mean_x = init
-        samples = []
-        sampler = self.__forward_fn()
-        for ti in self.t[:-1]:
-            with th.no_grad():
-                x, mean_x = sampler(x, mean_x, ti, model, **model_kwargs)
-                samples.append(x)
-        return samples
-class ode:
-    """ODE solver class"""
-    def __init__(
-        self,
-        drift,
-        *,
-        t0,
-        t1,
-        sampler_type,
-        num_steps,
-        atol,
-        rtol,
-    ):
-        assert t0 < t1, "ODE sampler has to be in forward time"
-        self.drift = drift
-        self.t = th.linspace(t0, t1, num_steps)
-        self.atol = atol
-        self.rtol = rtol
-        self.sampler_type = sampler_type
-    def sample(self, x, model, **model_kwargs):
-        device = x[0].device if isinstance(x, tuple) else x.device
-        def _fn(t, x):
-            t = th.ones(x[0].size(0)).to(device) * t if isinstance(x, tuple) else th.ones(x.size(0)).to(device) * t
-            model_output = self.drift(x, t, model, **model_kwargs)
-            return model_output
-        t = self.t.to(device)
-        atol = [self.atol] * len(x) if isinstance(x, tuple) else [self.atol]
-        rtol = [self.rtol] * len(x) if isinstance(x, tuple) else [self.rtol]
-        samples = odeint(
-            _fn,
-            x,
-            t,
-            method=self.sampler_type,
-            atol=atol,
-            rtol=rtol
-        )
-        return samples

ultrashape/models/diffusion/transport/path.py DELETED Viewed

@@ -1,220 +0,0 @@
-# This file includes code derived from the SiT project (https://github.com/willisma/SiT),
-# which is licensed under the MIT License.
-#
-# MIT License
-#
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-import torch as th
-import numpy as np
-from functools import partial
-def expand_t_like_x(t, x):
-    """Function to reshape time t to broadcastable dimension of x
-    Args:
-      t: [batch_dim,], time vector
-      x: [batch_dim,...], data point
-    """
-    dims = [1] * (len(x.size()) - 1)
-    t = t.view(t.size(0), *dims)
-    return t
-#################### Coupling Plans ####################
-class ICPlan:
-    """Linear Coupling Plan"""
-    def __init__(self, sigma=0.0):
-        self.sigma = sigma
-    def compute_alpha_t(self, t):
-        """Compute the data coefficient along the path"""
-        return t, 1
-    def compute_sigma_t(self, t):
-        """Compute the noise coefficient along the path"""
-        return 1 - t, -1
-    def compute_d_alpha_alpha_ratio_t(self, t):
-        """Compute the ratio between d_alpha and alpha"""
-        return 1 / t
-    def compute_drift(self, x, t):
-        """We always output sde according to score parametrization; """
-        t = expand_t_like_x(t, x)
-        alpha_ratio = self.compute_d_alpha_alpha_ratio_t(t)
-        sigma_t, d_sigma_t = self.compute_sigma_t(t)
-        drift = alpha_ratio * x
-        diffusion = alpha_ratio * (sigma_t ** 2) - sigma_t * d_sigma_t
-        return -drift, diffusion
-    def compute_diffusion(self, x, t, form="constant", norm=1.0):
-        """Compute the diffusion term of the SDE
-        Args:
-          x: [batch_dim, ...], data point
-          t: [batch_dim,], time vector
-          form: str, form of the diffusion term
-          norm: float, norm of the diffusion term
-        """
-        t = expand_t_like_x(t, x)
-        choices = {
-            "constant": norm,
-            "SBDM": norm * self.compute_drift(x, t)[1],
-            "sigma": norm * self.compute_sigma_t(t)[0],
-            "linear": norm * (1 - t),
-            "decreasing": 0.25 * (norm * th.cos(np.pi * t) + 1) ** 2,
-            "inccreasing-decreasing": norm * th.sin(np.pi * t) ** 2,
-        }
-        try:
-            diffusion = choices[form]
-        except KeyError:
-            raise NotImplementedError(f"Diffusion form {form} not implemented")
-        return diffusion
-    def get_score_from_velocity(self, velocity, x, t):
-        """Wrapper function: transfrom velocity prediction model to score
-        Args:
-            velocity: [batch_dim, ...] shaped tensor; velocity model output
-            x: [batch_dim, ...] shaped tensor; x_t data point
-            t: [batch_dim,] time tensor
-        """
-        t = expand_t_like_x(t, x)
-        alpha_t, d_alpha_t = self.compute_alpha_t(t)
-        sigma_t, d_sigma_t = self.compute_sigma_t(t)
-        mean = x
-        reverse_alpha_ratio = alpha_t / d_alpha_t
-        var = sigma_t**2 - reverse_alpha_ratio * d_sigma_t * sigma_t
-        score = (reverse_alpha_ratio * velocity - mean) / var
-        return score
-    def get_noise_from_velocity(self, velocity, x, t):
-        """Wrapper function: transfrom velocity prediction model to denoiser
-        Args:
-            velocity: [batch_dim, ...] shaped tensor; velocity model output
-            x: [batch_dim, ...] shaped tensor; x_t data point
-            t: [batch_dim,] time tensor
-        """
-        t = expand_t_like_x(t, x)
-        alpha_t, d_alpha_t = self.compute_alpha_t(t)
-        sigma_t, d_sigma_t = self.compute_sigma_t(t)
-        mean = x
-        reverse_alpha_ratio = alpha_t / d_alpha_t
-        var = reverse_alpha_ratio * d_sigma_t - sigma_t
-        noise = (reverse_alpha_ratio * velocity - mean) / var
-        return noise
-    def get_velocity_from_score(self, score, x, t):
-        """Wrapper function: transfrom score prediction model to velocity
-        Args:
-            score: [batch_dim, ...] shaped tensor; score model output
-            x: [batch_dim, ...] shaped tensor; x_t data point
-            t: [batch_dim,] time tensor
-        """
-        t = expand_t_like_x(t, x)
-        drift, var = self.compute_drift(x, t)
-        velocity = var * score - drift
-        return velocity
-    def compute_mu_t(self, t, x0, x1):
-        """Compute the mean of time-dependent density p_t"""
-        t = expand_t_like_x(t, x1)
-        alpha_t, _ = self.compute_alpha_t(t)
-        sigma_t, _ = self.compute_sigma_t(t)
-        # t*x1 + (1-t)*x0 ; t=0 x0; t=1 x1
-        return alpha_t * x1 + sigma_t * x0
-    def compute_xt(self, t, x0, x1):
-        """Sample xt from time-dependent density p_t; rng is required"""
-        xt = self.compute_mu_t(t, x0, x1)
-        return xt
-    def compute_ut(self, t, x0, x1, xt):
-        """Compute the vector field corresponding to p_t"""
-        t = expand_t_like_x(t, x1)
-        _, d_alpha_t = self.compute_alpha_t(t)
-        _, d_sigma_t = self.compute_sigma_t(t)
-        return d_alpha_t * x1 + d_sigma_t * x0
-    def plan(self, t, x0, x1):
-        xt = self.compute_xt(t, x0, x1)
-        ut = self.compute_ut(t, x0, x1, xt)
-        return t, xt, ut
-class VPCPlan(ICPlan):
-    """class for VP path flow matching"""
-    def __init__(self, sigma_min=0.1, sigma_max=20.0):
-        self.sigma_min = sigma_min
-        self.sigma_max = sigma_max
-        self.log_mean_coeff = lambda t: -0.25 * ((1 - t) ** 2) * \
-            (self.sigma_max - self.sigma_min) - 0.5 * (1 - t) * self.sigma_min
-        self.d_log_mean_coeff = lambda t: 0.5 * (1 - t) * \
-            (self.sigma_max - self.sigma_min) + 0.5 * self.sigma_min
-    def compute_alpha_t(self, t):
-        """Compute coefficient of x1"""
-        alpha_t = self.log_mean_coeff(t)
-        alpha_t = th.exp(alpha_t)
-        d_alpha_t = alpha_t * self.d_log_mean_coeff(t)
-        return alpha_t, d_alpha_t
-    def compute_sigma_t(self, t):
-        """Compute coefficient of x0"""
-        p_sigma_t = 2 * self.log_mean_coeff(t)
-        sigma_t = th.sqrt(1 - th.exp(p_sigma_t))
-        d_sigma_t = th.exp(p_sigma_t) * (2 * self.d_log_mean_coeff(t)) / (-2 * sigma_t)
-        return sigma_t, d_sigma_t
-    def compute_d_alpha_alpha_ratio_t(self, t):
-        """Special purposed function for computing numerical stabled d_alpha_t / alpha_t"""
-        return self.d_log_mean_coeff(t)
-    def compute_drift(self, x, t):
-        """Compute the drift term of the SDE"""
-        t = expand_t_like_x(t, x)
-        beta_t = self.sigma_min + (1 - t) * (self.sigma_max - self.sigma_min)
-        return -0.5 * beta_t * x, beta_t / 2
-class GVPCPlan(ICPlan):
-    def __init__(self, sigma=0.0):
-        super().__init__(sigma)
-    def compute_alpha_t(self, t):
-        """Compute coefficient of x1"""
-        alpha_t = th.sin(t * np.pi / 2)
-        d_alpha_t = np.pi / 2 * th.cos(t * np.pi / 2)
-        return alpha_t, d_alpha_t
-    def compute_sigma_t(self, t):
-        """Compute coefficient of x0"""
-        sigma_t = th.cos(t * np.pi / 2)
-        d_sigma_t = -np.pi / 2 * th.sin(t * np.pi / 2)
-        return sigma_t, d_sigma_t
-    def compute_d_alpha_alpha_ratio_t(self, t):
-        """Special purposed function for computing numerical stabled d_alpha_t / alpha_t"""
-        return np.pi / (2 * th.tan(t * np.pi / 2))

ultrashape/models/diffusion/transport/transport.py DELETED Viewed

@@ -1,534 +0,0 @@
-# This file includes code derived from the SiT project (https://github.com/willisma/SiT),
-# which is licensed under the MIT License.
-#
-# MIT License
-#
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-import torch as th
-import numpy as np
-import logging
-import enum
-from . import path
-from .utils import EasyDict, log_state, mean_flat
-from .integrators import ode, sde
-class ModelType(enum.Enum):
-    """
-    Which type of output the model predicts.
-    """
-    NOISE = enum.auto()  # the model predicts epsilon
-    SCORE = enum.auto()  # the model predicts \nabla \log p(x)
-    VELOCITY = enum.auto()  # the model predicts v(x)
-class PathType(enum.Enum):
-    """
-    Which type of path to use.
-    """
-    LINEAR = enum.auto()
-    GVP = enum.auto()
-    VP = enum.auto()
-class WeightType(enum.Enum):
-    """
-    Which type of weighting to use.
-    """
-    NONE = enum.auto()
-    VELOCITY = enum.auto()
-    LIKELIHOOD = enum.auto()
-class Transport:
-    def __init__(
-        self,
-        *,
-        model_type,
-        path_type,
-        loss_type,
-        train_eps,
-        sample_eps,
-        train_sample_type = "uniform",
-        **kwargs,
-    ):
-        path_options = {
-            PathType.LINEAR: path.ICPlan,
-            PathType.GVP: path.GVPCPlan,
-            PathType.VP: path.VPCPlan,
-        }
-        self.loss_type = loss_type
-        self.model_type = model_type
-        self.path_sampler = path_options[path_type]()
-        self.train_eps = train_eps
-        self.sample_eps = sample_eps
-        self.train_sample_type = train_sample_type
-        if self.train_sample_type == "logit_normal":
-            self.mean = kwargs['mean']
-            self.std = kwargs['std']
-            self.shift_scale = kwargs['shift_scale']
-            print(f"using logit normal sample, shift scale is {self.shift_scale}")
-    def prior_logp(self, z):
-        '''
-            Standard multivariate normal prior
-            Assume z is batched
-        '''
-        shape = th.tensor(z.size())
-        N = th.prod(shape[1:])
-        _fn = lambda x: -N / 2. * np.log(2 * np.pi) - th.sum(x ** 2) / 2.
-        return th.vmap(_fn)(z)
-    def check_interval(
-        self,
-        train_eps,
-        sample_eps,
-        *,
-        diffusion_form="SBDM",
-        sde=False,
-        reverse=False,
-        eval=False,
-        last_step_size=0.0,
-    ):
-        t0 = 0
-        t1 = 1
-        eps = train_eps if not eval else sample_eps
-        if (type(self.path_sampler) in [path.VPCPlan]):
-            t1 = 1 - eps if (not sde or last_step_size == 0) else 1 - last_step_size
-        elif (type(self.path_sampler) in [path.ICPlan, path.GVPCPlan]) \
-            and (
-            self.model_type != ModelType.VELOCITY or sde):  # avoid numerical issue by taking a first semi-implicit step
-            t0 = eps if (diffusion_form == "SBDM" and sde) or self.model_type != ModelType.VELOCITY else 0
-            t1 = 1 - eps if (not sde or last_step_size == 0) else 1 - last_step_size
-        if reverse:
-            t0, t1 = 1 - t0, 1 - t1
-        return t0, t1
-    def sample(self, x1):
-        """Sampling x0 & t based on shape of x1 (if needed)
-          Args:
-            x1 - data point; [batch, *dim]
-        """
-        x0 = th.randn_like(x1)
-        if self.train_sample_type=="uniform":
-            t0, t1 = self.check_interval(self.train_eps, self.sample_eps)
-            t = th.rand((x1.shape[0],)) * (t1 - t0) + t0
-            t = t.to(x1)
-        elif self.train_sample_type=="logit_normal":
-            t = th.randn((x1.shape[0],)) * self.std + self.mean
-            t = t.to(x1)
-            t = 1/(1+th.exp(-t))
-            t = np.sqrt(self.shift_scale)*t/(1+(np.sqrt(self.shift_scale)-1)*t)
-        return t, x0, x1
-    def training_losses(
-        self,
-        model,
-        x1,
-        model_kwargs=None
-    ):
-        """Loss for training the score model
-        Args:
-        - model: backbone model; could be score, noise, or velocity
-        - x1: datapoint
-        - model_kwargs: additional arguments for the model
-        """
-        if model_kwargs == None:
-            model_kwargs = {}
-        t, x0, x1 = self.sample(x1)
-        t, xt, ut = self.path_sampler.plan(t, x0, x1)
-        model_output = model(xt, t, **model_kwargs)
-        B, *_, C = xt.shape
-        assert model_output.size() == (B, *xt.size()[1:-1], C)
-        terms = {}
-        terms['pred'] = model_output
-        if self.model_type == ModelType.VELOCITY:
-            terms['loss'] = mean_flat(((model_output - ut) ** 2))
-        else:
-            _, drift_var = self.path_sampler.compute_drift(xt, t)
-            sigma_t, _ = self.path_sampler.compute_sigma_t(path.expand_t_like_x(t, xt))
-            if self.loss_type in [WeightType.VELOCITY]:
-                weight = (drift_var / sigma_t) ** 2
-            elif self.loss_type in [WeightType.LIKELIHOOD]:
-                weight = drift_var / (sigma_t ** 2)
-            elif self.loss_type in [WeightType.NONE]:
-                weight = 1
-            else:
-                raise NotImplementedError()
-            if self.model_type == ModelType.NOISE:
-                terms['loss'] = mean_flat(weight * ((model_output - x0) ** 2))
-            else:
-                terms['loss'] = mean_flat(weight * ((model_output * sigma_t + x0) ** 2))
-        return terms
-    def get_drift(
-        self
-    ):
-        """member function for obtaining the drift of the probability flow ODE"""
-        def score_ode(x, t, model, **model_kwargs):
-            drift_mean, drift_var = self.path_sampler.compute_drift(x, t)
-            model_output = model(x, t, **model_kwargs)
-            return (-drift_mean + drift_var * model_output)  # by change of variable
-        def noise_ode(x, t, model, **model_kwargs):
-            drift_mean, drift_var = self.path_sampler.compute_drift(x, t)
-            sigma_t, _ = self.path_sampler.compute_sigma_t(path.expand_t_like_x(t, x))
-            model_output = model(x, t, **model_kwargs)
-            score = model_output / -sigma_t
-            return (-drift_mean + drift_var * score)
-        def velocity_ode(x, t, model, **model_kwargs):
-            model_output = model(x, t, **model_kwargs)
-            return model_output
-        if self.model_type == ModelType.NOISE:
-            drift_fn = noise_ode
-        elif self.model_type == ModelType.SCORE:
-            drift_fn = score_ode
-        else:
-            drift_fn = velocity_ode
-        def body_fn(x, t, model, **model_kwargs):
-            model_output = drift_fn(x, t, model, **model_kwargs)
-            assert model_output.shape == x.shape, "Output shape from ODE solver must match input shape"
-            return model_output
-        return body_fn
-    def get_score(
-        self,
-    ):
-        """member function for obtaining score of
-            x_t = alpha_t * x + sigma_t * eps"""
-        if self.model_type == ModelType.NOISE:
-            score_fn = lambda x, t, model, **kwargs: model(x, t, **kwargs) / - \
-                self.path_sampler.compute_sigma_t(path.expand_t_like_x(t, x))[0]
-        elif self.model_type == ModelType.SCORE:
-            score_fn = lambda x, t, model, **kwagrs: model(x, t, **kwagrs)
-        elif self.model_type == ModelType.VELOCITY:
-            score_fn = lambda x, t, model, **kwargs: self.path_sampler.get_score_from_velocity(model(x, t, **kwargs), x,
-                                                                                               t)
-        else:
-            raise NotImplementedError()
-        return score_fn
-class Sampler:
-    """Sampler class for the transport model"""
-    def __init__(
-        self,
-        transport,
-    ):
-        """Constructor for a general sampler; supporting different sampling methods
-        Args:
-        - transport: an tranport object specify model prediction & interpolant type
-        """
-        self.transport = transport
-        self.drift = self.transport.get_drift()
-        self.score = self.transport.get_score()
-    def __get_sde_diffusion_and_drift(
-        self,
-        *,
-        diffusion_form="SBDM",
-        diffusion_norm=1.0,
-    ):
-        def diffusion_fn(x, t):
-            diffusion = self.transport.path_sampler.compute_diffusion(x, t, form=diffusion_form, norm=diffusion_norm)
-            return diffusion
-        sde_drift = \
-            lambda x, t, model, **kwargs: \
-                self.drift(x, t, model, **kwargs) + diffusion_fn(x, t) * self.score(x, t, model, **kwargs)
-        sde_diffusion = diffusion_fn
-        return sde_drift, sde_diffusion
-    def __get_last_step(
-        self,
-        sde_drift,
-        *,
-        last_step,
-        last_step_size,
-    ):
-        """Get the last step function of the SDE solver"""
-        if last_step is None:
-            last_step_fn = \
-                lambda x, t, model, **model_kwargs: \
-                    x
-        elif last_step == "Mean":
-            last_step_fn = \
-                lambda x, t, model, **model_kwargs: \
-                    x + sde_drift(x, t, model, **model_kwargs) * last_step_size
-        elif last_step == "Tweedie":
-            alpha = self.transport.path_sampler.compute_alpha_t  # simple aliasing; the original name was too long
-            sigma = self.transport.path_sampler.compute_sigma_t
-            last_step_fn = \
-                lambda x, t, model, **model_kwargs: \
-                    x / alpha(t)[0][0] + (sigma(t)[0][0] ** 2) / alpha(t)[0][0] * self.score(x, t, model,
-                                                                                             **model_kwargs)
-        elif last_step == "Euler":
-            last_step_fn = \
-                lambda x, t, model, **model_kwargs: \
-                    x + self.drift(x, t, model, **model_kwargs) * last_step_size
-        else:
-            raise NotImplementedError()
-        return last_step_fn
-    def sample_sde(
-        self,
-        *,
-        sampling_method="Euler",
-        diffusion_form="SBDM",
-        diffusion_norm=1.0,
-        last_step="Mean",
-        last_step_size=0.04,
-        num_steps=250,
-    ):
-        """returns a sampling function with given SDE settings
-        Args:
-        - sampling_method: type of sampler used in solving the SDE; default to be Euler-Maruyama
-        - diffusion_form: function form of diffusion coefficient; default to be matching SBDM
-        - diffusion_norm: function magnitude of diffusion coefficient; default to 1
-        - last_step: type of the last step; default to identity
-        - last_step_size: size of the last step; default to match the stride of 250 steps over [0,1]
-        - num_steps: total integration step of SDE
-        """
-        if last_step is None:
-            last_step_size = 0.0
-        sde_drift, sde_diffusion = self.__get_sde_diffusion_and_drift(
-            diffusion_form=diffusion_form,
-            diffusion_norm=diffusion_norm,
-        )
-        t0, t1 = self.transport.check_interval(
-            self.transport.train_eps,
-            self.transport.sample_eps,
-            diffusion_form=diffusion_form,
-            sde=True,
-            eval=True,
-            reverse=False,
-            last_step_size=last_step_size,
-        )
-        _sde = sde(
-            sde_drift,
-            sde_diffusion,
-            t0=t0,
-            t1=t1,
-            num_steps=num_steps,
-            sampler_type=sampling_method
-        )
-        last_step_fn = self.__get_last_step(sde_drift, last_step=last_step, last_step_size=last_step_size)
-        def _sample(init, model, **model_kwargs):
-            xs = _sde.sample(init, model, **model_kwargs)
-            ts = th.ones(init.size(0), device=init.device) * t1
-            x = last_step_fn(xs[-1], ts, model, **model_kwargs)
-            xs.append(x)
-            assert len(xs) == num_steps, "Samples does not match the number of steps"
-            return xs
-        return _sample
-    def sample_ode(
-        self,
-        *,
-        sampling_method="dopri5",
-        num_steps=50,
-        atol=1e-6,
-        rtol=1e-3,
-        reverse=False,
-    ):
-        """returns a sampling function with given ODE settings
-        Args:
-        - sampling_method: type of sampler used in solving the ODE; default to be Dopri5
-        - num_steps:
-            - fixed solver (Euler, Heun): the actual number of integration steps performed
-            - adaptive solver (Dopri5): the number of datapoints saved during integration; produced by interpolation
-        - atol: absolute error tolerance for the solver
-        - rtol: relative error tolerance for the solver
-        - reverse: whether solving the ODE in reverse (data to noise); default to False
-        """
-        if reverse:
-            drift = lambda x, t, model, **kwargs: self.drift(x, th.ones_like(t) * (1 - t), model, **kwargs)
-        else:
-            drift = self.drift
-        t0, t1 = self.transport.check_interval(
-            self.transport.train_eps,
-            self.transport.sample_eps,
-            sde=False,
-            eval=True,
-            reverse=reverse,
-            last_step_size=0.0,
-        )
-        _ode = ode(
-            drift=drift,
-            t0=t0,
-            t1=t1,
-            sampler_type=sampling_method,
-            num_steps=num_steps,
-            atol=atol,
-            rtol=rtol,
-        )
-        return _ode.sample
-    def sample_ode_intermediate(
-        self,
-        *,
-        sampling_method="dopri5",
-        num_steps=50,
-        atol=1e-6,
-        rtol=1e-3,
-        t=0.5,
-        reverse=False,
-    ):
-        """returns a sampling function with given ODE settings
-        Args:
-        - sampling_method: type of sampler used in solving the ODE; default to be Dopri5
-        - num_steps:
-            - fixed solver (Euler, Heun): the actual number of integration steps performed
-            - adaptive solver (Dopri5): the number of datapoints saved during integration; produced by interpolation
-        - atol: absolute error tolerance for the solver
-        - rtol: relative error tolerance for the solver
-        - reverse: whether solving the ODE in reverse (data to noise); default to False
-        """
-        if reverse:
-            drift = lambda x, t, model, **kwargs: self.drift(x, th.ones_like(t) * (1 - t), model, **kwargs)
-        else:
-            drift = self.drift
-        t0, t1 = self.transport.check_interval(
-            self.transport.train_eps,
-            self.transport.sample_eps,
-            sde=False,
-            eval=True,
-            reverse=reverse,
-            last_step_size=0.0,
-        )
-        _ode = ode(
-            drift=drift,
-            t0=t,
-            t1=t1,
-            sampler_type=sampling_method,
-            num_steps=num_steps,
-            atol=atol,
-            rtol=rtol,
-        )
-        return _ode.sample
-    def sample_ode_likelihood(
-        self,
-        *,
-        sampling_method="dopri5",
-        num_steps=50,
-        atol=1e-6,
-        rtol=1e-3,
-    ):
-        """returns a sampling function for calculating likelihood with given ODE settings
-        Args:
-        - sampling_method: type of sampler used in solving the ODE; default to be Dopri5
-        - num_steps:
-            - fixed solver (Euler, Heun): the actual number of integration steps performed
-            - adaptive solver (Dopri5): the number of datapoints saved during integration; produced by interpolation
-        - atol: absolute error tolerance for the solver
-        - rtol: relative error tolerance for the solver
-        """
-        def _likelihood_drift(x, t, model, **model_kwargs):
-            x, _ = x
-            eps = th.randint(2, x.size(), dtype=th.float, device=x.device) * 2 - 1
-            t = th.ones_like(t) * (1 - t)
-            with th.enable_grad():
-                x.requires_grad = True
-                grad = th.autograd.grad(th.sum(self.drift(x, t, model, **model_kwargs) * eps), x)[0]
-                logp_grad = th.sum(grad * eps, dim=tuple(range(1, len(x.size()))))
-                drift = self.drift(x, t, model, **model_kwargs)
-            return (-drift, logp_grad)
-        t0, t1 = self.transport.check_interval(
-            self.transport.train_eps,
-            self.transport.sample_eps,
-            sde=False,
-            eval=True,
-            reverse=False,
-            last_step_size=0.0,
-        )
-        _ode = ode(
-            drift=_likelihood_drift,
-            t0=t0,
-            t1=t1,
-            sampler_type=sampling_method,
-            num_steps=num_steps,
-            atol=atol,
-            rtol=rtol,
-        )
-        def _sample_fn(x, model, **model_kwargs):
-            init_logp = th.zeros(x.size(0)).to(x)
-            input = (x, init_logp)
-            drift, delta_logp = _ode.sample(input, model, **model_kwargs)
-            drift, delta_logp = drift[-1], delta_logp[-1]
-            prior_logp = self.transport.prior_logp(drift)
-            logp = prior_logp - delta_logp
-            return logp, drift
-        return _sample_fn

ultrashape/models/diffusion/transport/utils.py DELETED Viewed

@@ -1,54 +0,0 @@
-# This file includes code derived from the SiT project (https://github.com/willisma/SiT),
-# which is licensed under the MIT License.
-#
-# MIT License
-#
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-import torch as th
-class EasyDict:
-    def __init__(self, sub_dict):
-        for k, v in sub_dict.items():
-            setattr(self, k, v)
-    def __getitem__(self, key):
-        return getattr(self, key)
-def mean_flat(x):
-    """
-    Take the mean over all non-batch dimensions.
-    """
-    return th.mean(x, dim=list(range(1, len(x.size()))))
-def log_state(state):
-    result = []
-    sorted_state = dict(sorted(state.items()))
-    for key, value in sorted_state.items():
-        # Check if the value is an instance of a class
-        if "<object" in str(value) or "object at" in str(value):
-            result.append(f"{key}: [{value.__class__.__name__}]")
-        else:
-            result.append(f"{key}: {value}")
-    return '\n'.join(result)

ultrashape/pipelines.py DELETED Viewed

@@ -1,797 +0,0 @@
-# ==============================================================================
-# Original work Copyright (c) 2025 Tencent.
-# Modified work Copyright (c) 2025 UltraShape Team.
-#
-# Modified by UltraShape on 2025.12.25
-# ==============================================================================
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import copy
-import importlib
-import inspect
-import os
-from typing import List, Optional, Union
-import numpy as np
-import torch
-import trimesh
-import yaml
-from PIL import Image
-from diffusers.utils.torch_utils import randn_tensor
-from diffusers.utils.import_utils import is_accelerate_version, is_accelerate_available
-from tqdm import tqdm
-from .models.autoencoders import ShapeVAE
-from .models.autoencoders import SurfaceExtractors
-from .utils import logger, synchronize_timer, smart_load_model
-def retrieve_timesteps(
-    scheduler,
-    num_inference_steps: Optional[int] = None,
-    device: Optional[Union[str, torch.device]] = None,
-    timesteps: Optional[List[int]] = None,
-    sigmas: Optional[List[float]] = None,
-    **kwargs,
-):
-    """
-    Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
-    custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
-    Args:
-        scheduler (`SchedulerMixin`):
-            The scheduler to get timesteps from.
-        num_inference_steps (`int`):
-            The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
-            must be `None`.
-        device (`str` or `torch.device`, *optional*):
-            The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
-        timesteps (`List[int]`, *optional*):
-            Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
-            `num_inference_steps` and `sigmas` must be `None`.
-        sigmas (`List[float]`, *optional*):
-            Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
-            `num_inference_steps` and `timesteps` must be `None`.
-    Returns:
-        `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
-        second element is the number of inference steps.
-    """
-    if timesteps is not None and sigmas is not None:
-        raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
-    if timesteps is not None:
-        accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
-        if not accepts_timesteps:
-            raise ValueError(
-                f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
-                f" timestep schedules. Please check whether you are using the correct scheduler."
-            )
-        scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
-        timesteps = scheduler.timesteps
-        num_inference_steps = len(timesteps)
-    elif sigmas is not None:
-        accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
-        if not accept_sigmas:
-            raise ValueError(
-                f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
-                f" sigmas schedules. Please check whether you are using the correct scheduler."
-            )
-        scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
-        timesteps = scheduler.timesteps
-        num_inference_steps = len(timesteps)
-    else:
-        scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
-        timesteps = scheduler.timesteps
-    return timesteps, num_inference_steps
-@synchronize_timer('Export to trimesh')
-def export_to_trimesh(mesh_output):
-    if isinstance(mesh_output, list):
-        outputs = []
-        for mesh in mesh_output:
-            if mesh is None:
-                outputs.append(None)
-            else:
-                mesh.mesh_f = mesh.mesh_f[:, ::-1]
-                mesh_output = trimesh.Trimesh(mesh.mesh_v, mesh.mesh_f)
-                outputs.append(mesh_output)
-        return outputs
-    else:
-        mesh_output.mesh_f = mesh_output.mesh_f[:, ::-1]
-        mesh_output = trimesh.Trimesh(mesh_output.mesh_v, mesh_output.mesh_f)
-        return mesh_output
-def get_obj_from_str(string, reload=False):
-    module, cls = string.rsplit(".", 1)
-    if reload:
-        module_imp = importlib.import_module(module)
-        importlib.reload(module_imp)
-    return getattr(importlib.import_module(module, package=None), cls)
-def instantiate_from_config(config, **kwargs):
-    if "target" not in config:
-        raise KeyError("Expected key `target` to instantiate.")
-    cls = get_obj_from_str(config["target"])
-    params = config.get("params", dict())
-    kwargs.update(params)
-    instance = cls(**kwargs)
-    return instance
-class DiTPipeline:
-    model_cpu_offload_seq = "conditioner->model->vae"
-    _exclude_from_cpu_offload = []
-    @classmethod
-    @synchronize_timer('DiTPipeline Model Loading')
-    def from_single_file(
-        cls,
-        ckpt_path,
-        config_path,
-        device='cuda',
-        dtype=torch.float16,
-        use_safetensors=None,
-        **kwargs,
-    ):
-        # load config
-        with open(config_path, 'r') as f:
-            config = yaml.safe_load(f)
-        # load ckpt
-        if use_safetensors:
-            ckpt_path = ckpt_path.replace('.ckpt', '.safetensors')
-        if not os.path.exists(ckpt_path):
-            raise FileNotFoundError(f"Model file {ckpt_path} not found")
-        logger.info(f"Loading model from {ckpt_path}")
-        if use_safetensors:
-            # parse safetensors
-            import safetensors.torch
-            safetensors_ckpt = safetensors.torch.load_file(ckpt_path, device='cpu')
-            ckpt = {}
-            for key, value in safetensors_ckpt.items():
-                model_name = key.split('.')[0]
-                new_key = key[len(model_name) + 1:]
-                if model_name not in ckpt:
-                    ckpt[model_name] = {}
-                ckpt[model_name][new_key] = value
-        else:
-            ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=True)
-        # load model
-        model = instantiate_from_config(config['model'])
-        model.load_state_dict(ckpt['model'])
-        vae = instantiate_from_config(config['vae'])
-        vae.load_state_dict(ckpt['vae'], strict=False)
-        conditioner = instantiate_from_config(config['conditioner'])
-        if 'conditioner' in ckpt:
-            conditioner.load_state_dict(ckpt['conditioner'])
-        image_processor = instantiate_from_config(config['image_processor'])
-        scheduler = instantiate_from_config(config['scheduler'])
-        model_kwargs = dict(
-            vae=vae,
-            model=model,
-            scheduler=scheduler,
-            conditioner=conditioner,
-            image_processor=image_processor,
-            device=device,
-            dtype=dtype,
-        )
-        model_kwargs.update(kwargs)
-        return cls(
-            **model_kwargs
-        )
-    @classmethod
-    def from_pretrained(
-        cls,
-        model_path,
-        device='cuda',
-        dtype=torch.float16,
-        use_safetensors=False,
-        variant='fp16',
-        subfolder='hunyuan3d-dit-v2-1',
-        **kwargs,
-    ):
-        kwargs['from_pretrained_kwargs'] = dict(
-            model_path=model_path,
-            subfolder=subfolder,
-            use_safetensors=use_safetensors,
-            variant=variant,
-            dtype=dtype,
-            device=device,
-        )
-        config_path, ckpt_path = smart_load_model(
-            model_path,
-            subfolder=subfolder,
-            use_safetensors=use_safetensors,
-            variant=variant
-        )
-        return cls.from_single_file(
-            ckpt_path,
-            config_path,
-            device=device,
-            dtype=dtype,
-            use_safetensors=use_safetensors,
-            **kwargs
-        )
-    def __init__(
-        self,
-        vae,
-        model,
-        scheduler,
-        conditioner,
-        image_processor,
-        device='cuda',
-        dtype=torch.float16,
-        ref_model=None,
-        **kwargs
-    ):
-        self.vae = vae
-        self.model = model
-        self.ref_model = ref_model
-        self.scheduler = scheduler
-        self.conditioner = conditioner
-        self.image_processor = image_processor
-        self.kwargs = kwargs
-        self.components = {
-            "vae": vae,
-            "model": model,
-            "scheduler": scheduler,
-            "conditioner": conditioner,
-            "image_processor": image_processor,
-        }
-        if ref_model is not None:
-             self.components["ref_model"] = ref_model
-        self.to(device, dtype)
-    def compile(self):
-        self.vae = torch.compile(self.vae)
-        self.model = torch.compile(self.model)
-        self.conditioner = torch.compile(self.conditioner)
-    def enable_flashvdm(
-        self,
-        enabled: bool = True,
-        adaptive_kv_selection=True,
-        topk_mode='mean',
-        mc_algo='mc',
-        replace_vae=True,
-    ):
-        if enabled:
-            self.vae.enable_flashvdm_decoder(
-                enabled=enabled,
-                adaptive_kv_selection=adaptive_kv_selection,
-                topk_mode=topk_mode,
-                mc_algo=mc_algo
-            )
-        else:
-            self.vae.enable_flashvdm_decoder(enabled=False)
-    def to(self, device=None, dtype=None):
-        if dtype is not None:
-            self.dtype = dtype
-            self.vae.to(dtype=dtype)
-            self.model.to(dtype=dtype)
-            self.conditioner.to(dtype=dtype)
-        if device is not None:
-            self.device = torch.device(device)
-            self.vae.to(device)
-            self.model.to(device)
-            self.conditioner.to(device)
-    @property
-    def _execution_device(self):
-        r"""
-        Returns the device on which the pipeline's models will be executed. After calling
-        [`~DiffusionPipeline.enable_sequential_cpu_offload`] the execution device can only be inferred from
-        Accelerate's module hooks.
-        """
-        for name, model in self.components.items():
-            if not isinstance(model, torch.nn.Module) or name in self._exclude_from_cpu_offload:
-                continue
-            if not hasattr(model, "_hf_hook"):
-                return self.device
-            for module in model.modules():
-                if (
-                    hasattr(module, "_hf_hook")
-                    and hasattr(module._hf_hook, "execution_device")
-                    and module._hf_hook.execution_device is not None
-                ):
-                    return torch.device(module._hf_hook.execution_device)
-        return self.device
-    def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
-        r"""
-        Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
-        to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
-        method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
-        `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
-        Arguments:
-            gpu_id (`int`, *optional*):
-                The ID of the accelerator that shall be used in inference. If not specified, it will default to 0.
-            device (`torch.Device` or `str`, *optional*, defaults to "cuda"):
-                The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
-                default to "cuda".
-        """
-        if self.model_cpu_offload_seq is None:
-            raise ValueError(
-                "Model CPU offload cannot be enabled because no `model_cpu_offload_seq` class attribute is set."
-            )
-        if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
-            from accelerate import cpu_offload_with_hook
-        else:
-            raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
-        torch_device = torch.device(device)
-        device_index = torch_device.index
-        if gpu_id is not None and device_index is not None:
-            raise ValueError(
-                f"You have passed both `gpu_id`={gpu_id} and an index as part of the passed device `device`={device}"
-                f"Cannot pass both. Please make sure to either not define `gpu_id` or not pass the index as part of "
-                f"the device: `device`={torch_device.type}"
-            )
-        # _offload_gpu_id should be set to passed gpu_id (or id in passed `device`)
-        # or default to previously set id or default to 0
-        self._offload_gpu_id = gpu_id or torch_device.index or getattr(self, "_offload_gpu_id", 0)
-        device_type = torch_device.type
-        device = torch.device(f"{device_type}:{self._offload_gpu_id}")
-        if self.device.type != "cpu":
-            self.to("cpu")
-            device_mod = getattr(torch, self.device.type, None)
-            if hasattr(device_mod, "empty_cache") and device_mod.is_available():
-                device_mod.empty_cache()
-                # otherwise we don't see the memory savings (but they probably exist)
-        all_model_components = {k: v for k, v in self.components.items() if isinstance(v, torch.nn.Module)}
-        self._all_hooks = []
-        hook = None
-        for model_str in self.model_cpu_offload_seq.split("->"):
-            model = all_model_components.pop(model_str, None)
-            if not isinstance(model, torch.nn.Module):
-                continue
-            _, hook = cpu_offload_with_hook(model, device, prev_module_hook=hook)
-            self._all_hooks.append(hook)
-        # CPU offload models that are not in the seq chain unless they are explicitly excluded
-        # these models will stay on CPU until maybe_free_model_hooks is called
-        # some models cannot be in the seq chain because they are iteratively called,
-        # such as controlnet
-        for name, model in all_model_components.items():
-            if not isinstance(model, torch.nn.Module):
-                continue
-            if name in self._exclude_from_cpu_offload:
-                model.to(device)
-            else:
-                _, hook = cpu_offload_with_hook(model, device)
-                self._all_hooks.append(hook)
-    def maybe_free_model_hooks(self):
-        r"""
-        Function that offloads all components, removes all model hooks that were added when using
-        `enable_model_cpu_offload` and then applies them again. In case the model has not been offloaded this function
-        is a no-op. Make sure to add this function to the end of the `__call__` function of your pipeline so that it
-        functions correctly when applying enable_model_cpu_offload.
-        """
-        if not hasattr(self, "_all_hooks") or len(self._all_hooks) == 0:
-            # `enable_model_cpu_offload` has not be called, so silently do nothing
-            return
-        for hook in self._all_hooks:
-            # offload model and remove hook from model
-            hook.offload()
-            hook.remove()
-        # make sure the model is in the same state as before calling it
-        self.enable_model_cpu_offload()
-    @synchronize_timer('Encode cond')
-    def encode_cond(self, image, additional_cond_inputs, do_classifier_free_guidance, dual_guidance):
-        bsz = image.shape[0]
-        cond = self.conditioner(image=image, **additional_cond_inputs)  # cond['main'].shape
-        if do_classifier_free_guidance:
-            cond_token_num = cond["main"].shape[1]
-            additional_cond_inputs["num_tokens"] = cond_token_num
-            un_cond = self.conditioner.unconditional_embedding(bsz, **additional_cond_inputs)
-            if dual_guidance:
-                un_cond_drop_main = copy.deepcopy(un_cond)
-                un_cond_drop_main['additional'] = cond['additional']
-                def cat_recursive(a, b, c):
-                    if isinstance(a, torch.Tensor):
-                        return torch.cat([a, b, c], dim=0).to(self.dtype)
-                    out = {}
-                    for k in a.keys():
-                        out[k] = cat_recursive(a[k], b[k], c[k])
-                    return out
-                cond = cat_recursive(cond, un_cond_drop_main, un_cond)
-            else:
-                def cat_recursive(a, b):
-                    if isinstance(a, torch.Tensor):
-                        return torch.cat([a, b], dim=0).to(self.dtype)
-                    out = {}
-                    for k in a.keys():
-                        out[k] = cat_recursive(a[k], b[k])
-                    return out
-                cond = cat_recursive(cond, un_cond)
-        return cond
-    def prepare_extra_step_kwargs(self, generator, eta):
-        # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
-        # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
-        # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
-        # and should be between [0, 1]
-        accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
-        extra_step_kwargs = {}
-        if accepts_eta:
-            extra_step_kwargs["eta"] = eta
-        # check if the scheduler accepts generator
-        accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys())
-        if accepts_generator:
-            extra_step_kwargs["generator"] = generator
-        return extra_step_kwargs
-    def prepare_latents(self, batch_size, dtype, device, generator, latents=None, shape=None):
-        if shape is None:
-            shape = (batch_size, *self.vae.latent_shape)
-        else:
-            shape = (batch_size, *shape)
-        if isinstance(generator, list) and len(generator) != batch_size:
-            raise ValueError(
-                f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
-                f" size of {batch_size}. Make sure the batch size matches the length of the generators."
-            )
-        if latents is None:
-            latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
-        else:
-            latents = latents.to(device)
-        # scale the initial noise by the standard deviation required by the scheduler
-        latents = latents * getattr(self.scheduler, 'init_noise_sigma', 1.0)
-        return latents
-    def prepare_image(self, image, mask=None) -> dict:
-        if isinstance(image, torch.Tensor) and isinstance(mask, torch.Tensor):
-            outputs = {
-                'image': image,
-                'mask': mask
-            }
-            return outputs
-        if isinstance(image, str) and not os.path.exists(image):
-            raise FileNotFoundError(f"Couldn't find image at path {image}")
-        if not isinstance(image, list):
-            image = [image]
-        outputs = []
-        for img in image:
-            output = self.image_processor(img)  # output['image'].shape
-            outputs.append(output)
-        cond_input = {k: [] for k in outputs[0].keys()}
-        for output in outputs:
-            for key, value in output.items():
-                cond_input[key].append(value)
-        for key, value in cond_input.items():
-            if isinstance(value[0], torch.Tensor):
-                cond_input[key] = torch.cat(value, dim=0)
-        return cond_input
-    def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
-        """
-        See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
-        Args:
-            timesteps (`torch.Tensor`):
-                generate embedding vectors at these timesteps
-            embedding_dim (`int`, *optional*, defaults to 512):
-                dimension of the embeddings to generate
-            dtype:
-                data type of the generated embeddings
-        Returns:
-            `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
-        """
-        assert len(w.shape) == 1
-        w = w * 1000.0
-        half_dim = embedding_dim // 2
-        emb = torch.log(torch.tensor(10000.0)) / (half_dim - 1)
-        emb = torch.exp(torch.arange(half_dim, dtype=dtype) * -emb)
-        emb = w.to(dtype)[:, None] * emb[None, :]
-        emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1)
-        if embedding_dim % 2 == 1:  # zero pad
-            emb = torch.nn.functional.pad(emb, (0, 1))
-        assert emb.shape == (w.shape[0], embedding_dim)
-        return emb
-    def set_surface_extractor(self, mc_algo):
-        if mc_algo is None:
-            return
-        logger.info('The parameters `mc_algo` is deprecated, and will be removed in future versions.\n'
-                    'Please use: \n'
-                    'from hy3dshape.models.autoencoders import SurfaceExtractors\n'
-                    'pipeline.vae.surface_extractor = SurfaceExtractors[mc_algo]() instead\n')
-        if mc_algo not in SurfaceExtractors.keys():
-            raise ValueError(f"Unknown mc_algo {mc_algo}")
-        self.vae.surface_extractor = SurfaceExtractors[mc_algo]()
-    @torch.no_grad()
-    def __call__(
-        self,
-        image: Union[str, List[str], Image.Image] = None,
-        num_inference_steps: int = 50,
-        timesteps: List[int] = None,
-        sigmas: List[float] = None,
-        eta: float = 0.0,
-        guidance_scale: float = 7.5,
-        dual_guidance_scale: float = 10.5,
-        dual_guidance: bool = True,
-        generator=None,
-        box_v=1.01,
-        octree_resolution=384,
-        mc_level=-1 / 512,
-        num_chunks=8000,
-        mc_algo=None,
-        output_type: Optional[str] = "trimesh",
-        enable_pbar=True,
-        **kwargs,
-    ) -> List[List[trimesh.Trimesh]]:
-        callback = kwargs.pop("callback", None)
-        callback_steps = kwargs.pop("callback_steps", None)
-        self.set_surface_extractor(mc_algo)
-        device = self.device
-        dtype = self.dtype
-        do_classifier_free_guidance = guidance_scale >= 0 and \
-                                      getattr(self.model, 'guidance_cond_proj_dim', None) is None
-        dual_guidance = dual_guidance_scale >= 0 and dual_guidance
-        if isinstance(image, torch.Tensor):
-            pass
-        else:
-            cond_inputs = self.prepare_image(image)
-            image = cond_inputs.pop('image')
-        cond = self.encode_cond(
-            image=image,
-            additional_cond_inputs=cond_inputs,
-            do_classifier_free_guidance=do_classifier_free_guidance,
-            dual_guidance=False,
-        )
-        batch_size = image.shape[0]
-        t_dtype = torch.long
-        timesteps, num_inference_steps = retrieve_timesteps(
-            self.scheduler, num_inference_steps, device, timesteps, sigmas)
-        latents = self.prepare_latents(batch_size, dtype, device, generator)
-        extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
-        guidance_cond = None
-        if getattr(self.model, 'guidance_cond_proj_dim', None) is not None:
-            logger.info('Using lcm guidance scale')
-            guidance_scale_tensor = torch.tensor(guidance_scale - 1).repeat(batch_size)
-            guidance_cond = self.get_guidance_scale_embedding(
-                guidance_scale_tensor, embedding_dim=self.model.guidance_cond_proj_dim
-            ).to(device=device, dtype=latents.dtype)
-        with synchronize_timer('Diffusion Sampling'):
-            for i, t in enumerate(tqdm(timesteps, disable=not enable_pbar, desc="Diffusion Sampling:", leave=False)):
-                # expand the latents if we are doing classifier free guidance
-                if do_classifier_free_guidance:
-                    latent_model_input = torch.cat([latents] * (3 if dual_guidance else 2))
-                else:
-                    latent_model_input = latents
-                latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
-                # predict the noise residual
-                timestep_tensor = torch.tensor([t], dtype=t_dtype, device=device)
-                timestep_tensor = timestep_tensor.expand(latent_model_input.shape[0])
-                noise_pred = self.model(latent_model_input, timestep_tensor, cond, guidance_cond=guidance_cond)
-                # no drop, drop clip, all drop
-                if do_classifier_free_guidance:
-                    if dual_guidance:
-                        noise_pred_clip, noise_pred_dino, noise_pred_uncond = noise_pred.chunk(3)
-                        noise_pred = (
-                            noise_pred_uncond
-                            + guidance_scale * (noise_pred_clip - noise_pred_dino)
-                            + dual_guidance_scale * (noise_pred_dino - noise_pred_uncond)
-                        )
-                    else:
-                        noise_pred_cond, noise_pred_uncond = noise_pred.chunk(2)
-                        noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_cond - noise_pred_uncond)
-                # compute the previous noisy sample x_t -> x_t-1
-                outputs = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs)
-                latents = outputs.prev_sample
-                if callback is not None and i % callback_steps == 0:
-                    step_idx = i // getattr(self.scheduler, "order", 1)
-                    callback(step_idx, t, outputs)
-        return self._export(
-            latents,
-            output_type,
-            box_v, mc_level, num_chunks, octree_resolution, mc_algo,
-        )
-    def _export(
-        self,
-        latents,
-        output_type='trimesh',
-        box_v=1.01,
-        mc_level=0.0,
-        num_chunks=20000,
-        octree_resolution=256,
-        mc_algo='mc',
-        enable_pbar=True
-    ):
-        if not output_type == "latent":
-            latents = 1. / self.vae.scale_factor * latents
-            latents = self.vae(latents)
-            outputs, _ = self.vae.latents2mesh(
-                latents,
-                bounds=box_v,
-                mc_level=mc_level,
-                num_chunks=num_chunks,
-                octree_resolution=octree_resolution,
-                mc_algo=mc_algo,
-                enable_pbar=enable_pbar,
-            )
-        else:
-            outputs = latents
-        if output_type == 'trimesh':
-            outputs = export_to_trimesh(outputs)
-        return outputs
-class UltraShapePipeline(DiTPipeline):
-    @torch.inference_mode()
-    def __call__(
-        self,
-        image: Union[str, List[str], Image.Image, dict, List[dict], torch.Tensor] = None,
-        voxel_cond: torch.Tensor = None,
-        num_inference_steps: int = 50,
-        timesteps: List[int] = None,
-        sigmas: List[float] = None,
-        eta: float = 0.0,
-        guidance_scale: float = 5.0,
-        generator=None,
-        box_v=1.01,
-        octree_resolution=384,
-        mc_level=0.0,
-        mc_algo=None,
-        num_chunks=8000,
-        output_type: Optional[str] = "trimesh",
-        enable_pbar=True,
-        mask = None,
-        **kwargs,
-    ) -> List[List[trimesh.Trimesh]]:
-        callback = kwargs.pop("callback", None)
-        callback_steps = kwargs.pop("callback_steps", None)
-        self.set_surface_extractor(mc_algo)
-        device = self.device
-        dtype = self.dtype
-        do_classifier_free_guidance = guidance_scale >= 0 and not (
-            hasattr(self.model, 'guidance_embed') and
-            self.model.guidance_embed is True
-        )
-        # print('image', type(image), 'mask', type(mask))
-        cond_inputs = self.prepare_image(image, mask)
-        image = cond_inputs.pop('image')
-        cond = self.encode_cond(
-            image=image,
-            additional_cond_inputs=cond_inputs,
-            do_classifier_free_guidance=do_classifier_free_guidance,
-            dual_guidance=False,
-        )
-        batch_size = image.shape[0]
-        # 5. Prepare timesteps
-        # NOTE: this is slightly different from common usage, we start from 0.
-        sigmas = np.linspace(0, 1, num_inference_steps) if sigmas is None else sigmas
-        timesteps, num_inference_steps = retrieve_timesteps(
-            self.scheduler,
-            num_inference_steps,
-            device,
-            sigmas=sigmas,
-        )
-        latents_shape = None
-        if voxel_cond is not None:
-             # voxel_cond: [B, N, 3] -> [N, 3] if batched? No, it's [B, N, 3] usually
-             # The encoder expects [B, N, 3]
-             num_tokens = voxel_cond.shape[1]
-             latents_shape = (num_tokens, self.vae.latent_shape[-1])
-        latents = self.prepare_latents(batch_size, dtype, device, generator, shape=latents_shape)
-        guidance = None
-        if hasattr(self.model, 'guidance_embed') and \
-            self.model.guidance_embed is True:
-            guidance = torch.tensor([guidance_scale] * batch_size, device=device, dtype=dtype)
-            # logger.info(f'Using guidance embed with scale {guidance_scale}')
-        if do_classifier_free_guidance and voxel_cond is not None:
-            voxel_cond = torch.cat([voxel_cond] * 2)
-        with synchronize_timer('Diffusion Sampling'):
-            for i, t in enumerate(tqdm(timesteps, disable=not enable_pbar, desc="Diffusion Sampling:")):
-                # expand the latents if we are doing classifier free guidance
-                if do_classifier_free_guidance:
-                    latent_model_input = torch.cat([latents] * 2)
-                else:
-                    latent_model_input = latents
-                # NOTE: we assume model get timesteps ranged from 0 to 1
-                timestep = t.expand(latent_model_input.shape[0]).to(latents.dtype).to(latent_model_input.device)
-                timestep = timestep / self.scheduler.config.num_train_timesteps
-                if voxel_cond is None:
-                    noise_pred = self.model(latent_model_input, timestep, cond, guidance=guidance)
-                else:
-                    noise_pred = self.model(latent_model_input, timestep, cond,
-                            guidance=guidance, voxel_cond=voxel_cond)
-                if do_classifier_free_guidance:
-                    noise_pred_cond, noise_pred_uncond = noise_pred.chunk(2)
-                    noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_cond - noise_pred_uncond)
-                # compute the previous noisy sample x_t -> x_t-1
-                outputs = self.scheduler.step(noise_pred, t, latents)
-                latents = outputs.prev_sample
-                if callback is not None and i % callback_steps == 0:
-                    step_idx = i // getattr(self.scheduler, "order", 1)
-                    callback(step_idx, t, outputs)
-        return self._export(
-            latents,
-            output_type,
-            box_v, mc_level, num_chunks, octree_resolution, mc_algo,
-            enable_pbar=enable_pbar,
-        ), latents

ultrashape/postprocessors.py DELETED Viewed

@@ -1,209 +0,0 @@
-# ==============================================================================
-# Original work Copyright (c) 2025 Tencent.
-# Modified work Copyright (c) 2025 UltraShape Team.
-#
-# Modified by UltraShape on 2025.12.25
-# ==============================================================================
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import os
-import tempfile
-from typing import Union
-import numpy as np
-import pymeshlab
-import torch
-import trimesh
-from .models.autoencoders import Latent2MeshOutput
-from .utils import synchronize_timer
-def load_mesh(path):
-    if path.endswith(".glb"):
-        mesh = trimesh.load(path)
-    else:
-        mesh = pymeshlab.MeshSet()
-        mesh.load_new_mesh(path)
-    return mesh
-def reduce_face(mesh: pymeshlab.MeshSet, max_facenum: int = 200000):
-    if max_facenum > mesh.current_mesh().face_number():
-        return mesh
-    mesh.apply_filter(
-        "meshing_decimation_quadric_edge_collapse",
-        targetfacenum=max_facenum,
-        qualitythr=1.0,
-        preserveboundary=True,
-        boundaryweight=3,
-        preservenormal=True,
-        preservetopology=True,
-        autoclean=True
-    )
-    return mesh
-def remove_floater(mesh: pymeshlab.MeshSet):
-    mesh.apply_filter("compute_selection_by_small_disconnected_components_per_face",
-                      nbfaceratio=0.005)
-    mesh.apply_filter("compute_selection_transfer_face_to_vertex", inclusive=False)
-    mesh.apply_filter("meshing_remove_selected_vertices_and_faces")
-    return mesh
-def pymeshlab2trimesh(mesh: pymeshlab.MeshSet):
-    with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as temp_file:
-        mesh.save_current_mesh(temp_file.name)
-        mesh = trimesh.load(temp_file.name)
-    # 检查加载的对象类型
-    if isinstance(mesh, trimesh.Scene):
-        combined_mesh = trimesh.Trimesh()
-        # 如果是Scene，遍历所有的geometry并合并
-        for geom in mesh.geometry.values():
-            combined_mesh = trimesh.util.concatenate([combined_mesh, geom])
-        mesh = combined_mesh
-    return mesh
-def trimesh2pymeshlab(mesh: trimesh.Trimesh):
-    with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as temp_file:
-        if isinstance(mesh, trimesh.scene.Scene):
-            for idx, obj in enumerate(mesh.geometry.values()):
-                if idx == 0:
-                    temp_mesh = obj
-                else:
-                    temp_mesh = temp_mesh + obj
-            mesh = temp_mesh
-        mesh.export(temp_file.name)
-        mesh = pymeshlab.MeshSet()
-        mesh.load_new_mesh(temp_file.name)
-    return mesh
-def export_mesh(input, output):
-    if isinstance(input, pymeshlab.MeshSet):
-        mesh = output
-    elif isinstance(input, Latent2MeshOutput):
-        output = Latent2MeshOutput()
-        output.mesh_v = output.current_mesh().vertex_matrix()
-        output.mesh_f = output.current_mesh().face_matrix()
-        mesh = output
-    else:
-        mesh = pymeshlab2trimesh(output)
-    return mesh
-def import_mesh(mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str]) -> pymeshlab.MeshSet:
-    if isinstance(mesh, str):
-        mesh = load_mesh(mesh)
-    elif isinstance(mesh, Latent2MeshOutput):
-        mesh = pymeshlab.MeshSet()
-        mesh_pymeshlab = pymeshlab.Mesh(vertex_matrix=mesh.mesh_v, face_matrix=mesh.mesh_f)
-        mesh.add_mesh(mesh_pymeshlab, "converted_mesh")
-    if isinstance(mesh, (trimesh.Trimesh, trimesh.scene.Scene)):
-        mesh = trimesh2pymeshlab(mesh)
-    return mesh
-class FaceReducer:
-    @synchronize_timer('FaceReducer')
-    def __call__(
-        self,
-        mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str],
-        max_facenum: int = 40000
-    ) -> Union[pymeshlab.MeshSet, trimesh.Trimesh]:
-        ms = import_mesh(mesh)
-        ms = reduce_face(ms, max_facenum=max_facenum)
-        mesh = export_mesh(mesh, ms)
-        return mesh
-class FloaterRemover:
-    @synchronize_timer('FloaterRemover')
-    def __call__(
-        self,
-        mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str],
-    ) -> Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput]:
-        ms = import_mesh(mesh)
-        ms = remove_floater(ms)
-        mesh = export_mesh(mesh, ms)
-        return mesh
-class DegenerateFaceRemover:
-    @synchronize_timer('DegenerateFaceRemover')
-    def __call__(
-        self,
-        mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str],
-    ) -> Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput]:
-        ms = import_mesh(mesh)
-        with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as temp_file:
-            ms.save_current_mesh(temp_file.name)
-            ms = pymeshlab.MeshSet()
-            ms.load_new_mesh(temp_file.name)
-        mesh = export_mesh(mesh, ms)
-        return mesh
-def mesh_normalize(mesh):
-    """
-    Normalize mesh vertices to sphere
-    """
-    scale_factor = 1.2
-    vtx_pos = np.asarray(mesh.vertices)
-    max_bb = (vtx_pos - 0).max(0)[0]
-    min_bb = (vtx_pos - 0).min(0)[0]
-    center = (max_bb + min_bb) / 2
-    scale = torch.norm(torch.tensor(vtx_pos - center, dtype=torch.float32), dim=1).max() * 2.0
-    vtx_pos = (vtx_pos - center) * (scale_factor / float(scale))
-    mesh.vertices = vtx_pos
-    return mesh
-class MeshSimplifier:
-    def __init__(self, executable: str = None):
-        if executable is None:
-            CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
-            executable = os.path.join(CURRENT_DIR, "mesh_simplifier.bin")
-        self.executable = executable
-    @synchronize_timer('MeshSimplifier')
-    def __call__(
-        self,
-        mesh: Union[trimesh.Trimesh],
-    ) -> Union[trimesh.Trimesh]:
-        with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as temp_input:
-            with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as temp_output:
-                mesh.export(temp_input.name)
-                os.system(f'{self.executable} {temp_input.name} {temp_output.name}')
-                ms = trimesh.load(temp_output.name, process=False)
-                if isinstance(ms, trimesh.Scene):
-                    combined_mesh = trimesh.Trimesh()
-                    for geom in ms.geometry.values():
-                        combined_mesh = trimesh.util.concatenate([combined_mesh, geom])
-                    ms = combined_mesh
-                ms = mesh_normalize(ms)
-                return ms

ultrashape/preprocessors.py DELETED Viewed

@@ -1,167 +0,0 @@
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import cv2
-import numpy as np
-import torch
-from PIL import Image
-from einops import repeat, rearrange
-def array_to_tensor(np_array):
-    image_pt = torch.tensor(np_array).float()
-    image_pt = image_pt / 255 * 2 - 1
-    image_pt = rearrange(image_pt, "h w c -> c h w")
-    image_pts = repeat(image_pt, "c h w -> b c h w", b=1)
-    return image_pts
-class ImageProcessorV2:
-    def __init__(self, size=512, border_ratio=None):
-        self.size = size
-        self.border_ratio = border_ratio
-    @staticmethod
-    def recenter(image, border_ratio: float = 0.2):
-        """ recenter an image to leave some empty space at the image border.
-        Args:
-            image (ndarray): input image, float/uint8 [H, W, 3/4]
-            mask (ndarray): alpha mask, bool [H, W]
-            border_ratio (float, optional): border ratio, image will be resized to (1 - border_ratio). Defaults to 0.2.
-        Returns:
-            ndarray: output image, float/uint8 [H, W, 3/4]
-        """
-        if image.shape[-1] == 4:
-            mask = image[..., 3]
-        else:
-            mask = np.ones_like(image[..., 0:1]) * 255
-            image = np.concatenate([image, mask], axis=-1)
-            mask = mask[..., 0]
-        H, W, C = image.shape
-        size = max(H, W)
-        result = np.zeros((size, size, C), dtype=np.uint8)
-        coords = np.nonzero(mask)
-        x_min, x_max = coords[0].min(), coords[0].max()
-        y_min, y_max = coords[1].min(), coords[1].max()
-        h = x_max - x_min
-        w = y_max - y_min
-        if h == 0 or w == 0:
-            raise ValueError('input image is empty')
-        desired_size = int(size * (1 - border_ratio))
-        scale = desired_size / max(h, w)
-        h2 = int(h * scale)
-        w2 = int(w * scale)
-        x2_min = (size - h2) // 2
-        x2_max = x2_min + h2
-        y2_min = (size - w2) // 2
-        y2_max = y2_min + w2
-        result[x2_min:x2_max, y2_min:y2_max] = cv2.resize(image[x_min:x_max, y_min:y_max], (w2, h2),
-                                                          interpolation=cv2.INTER_AREA)
-        bg = np.ones((result.shape[0], result.shape[1], 3), dtype=np.uint8) * 255
-        mask = result[..., 3:].astype(np.float32) / 255
-        result = result[..., :3] * mask + bg * (1 - mask)
-        mask = mask * 255
-        result = result.clip(0, 255).astype(np.uint8)
-        mask = mask.clip(0, 255).astype(np.uint8)
-        return result, mask
-    def load_image(self, image, border_ratio=0.15, to_tensor=True):
-        if isinstance(image, str):
-            image = cv2.imread(image, cv2.IMREAD_UNCHANGED)
-            image, mask = self.recenter(image, border_ratio=border_ratio)
-            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-        elif isinstance(image, Image.Image):
-            image = image.convert("RGBA")
-            image = np.asarray(image)
-            image, mask = self.recenter(image, border_ratio=border_ratio)
-        image = cv2.resize(image, (self.size, self.size), interpolation=cv2.INTER_CUBIC)
-        mask = cv2.resize(mask, (self.size, self.size), interpolation=cv2.INTER_NEAREST)
-        mask = mask[..., np.newaxis]
-        if to_tensor:
-            image = array_to_tensor(image)
-            mask = array_to_tensor(mask)
-        return image, mask
-    def __call__(self, image, border_ratio=0.15, to_tensor=True, **kwargs):
-        if self.border_ratio is not None:
-            border_ratio = self.border_ratio
-        image, mask = self.load_image(image, border_ratio=border_ratio, to_tensor=to_tensor)
-        outputs = {
-            'image': image,
-            'mask': mask
-        }
-        return outputs
-class MVImageProcessorV2(ImageProcessorV2):
-    """
-    view order: front, front clockwise 90, back, front clockwise 270
-    """
-    return_view_idx = True
-    def __init__(self, size=512, border_ratio=None):
-        super().__init__(size, border_ratio)
-        self.view2idx = {
-            'front': 0,
-            'left': 1,
-            'back': 2,
-            'right': 3
-        }
-    def __call__(self, image_dict, border_ratio=0.15, to_tensor=True, **kwargs):
-        if self.border_ratio is not None:
-            border_ratio = self.border_ratio
-        images = []
-        masks = []
-        view_idxs = []
-        for idx, (view_tag, image) in enumerate(image_dict.items()):
-            view_idxs.append(self.view2idx[view_tag])
-            image, mask = self.load_image(image, border_ratio=border_ratio, to_tensor=to_tensor)
-            images.append(image)
-            masks.append(mask)
-        zipped_lists = zip(view_idxs, images, masks)
-        sorted_zipped_lists = sorted(zipped_lists)
-        view_idxs, images, masks = zip(*sorted_zipped_lists)
-        image = torch.cat(images, 0).unsqueeze(0)
-        mask = torch.cat(masks, 0).unsqueeze(0)
-        outputs = {
-            'image': image,
-            'mask': mask,
-            'view_idxs': view_idxs
-        }
-        return outputs
-IMAGE_PROCESSORS = {
-    "v2": ImageProcessorV2,
-    'mv_v2': MVImageProcessorV2,
-}
-DEFAULT_IMAGEPROCESSOR = 'v2'

ultrashape/rembg.py DELETED Viewed

@@ -1,32 +0,0 @@
-# ==============================================================================
-# Original work Copyright (c) 2025 Tencent.
-# Modified work Copyright (c) 2025 UltraShape Team.
-#
-# Modified by UltraShape on 2025.12.25
-# ==============================================================================
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-from PIL import Image
-from rembg import remove, new_session
-class BackgroundRemover():
-    def __init__(self):
-        self.session = new_session()
-    def __call__(self, image: Image.Image):
-        output = remove(image, session=self.session, bgcolor=[255, 255, 255, 0])
-        return output

ultrashape/schedulers.py DELETED Viewed

@@ -1,480 +0,0 @@
-# Copyright 2024 Stability AI, Katherine Crowson and The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import math
-from dataclasses import dataclass
-from typing import List, Optional, Tuple, Union
-import numpy as np
-import torch
-from diffusers.configuration_utils import ConfigMixin, register_to_config
-from diffusers.schedulers.scheduling_utils import SchedulerMixin
-from diffusers.utils import BaseOutput, logging
-logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
-@dataclass
-class FlowMatchEulerDiscreteSchedulerOutput(BaseOutput):
-    """
-    Output class for the scheduler's `step` function output.
-    Args:
-        prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
-            Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
-            denoising loop.
-    """
-    prev_sample: torch.FloatTensor
-class FlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
-    """
-    NOTE: this is very similar to diffusers.FlowMatchEulerDiscreteScheduler. Except our timesteps are reversed
-    Euler scheduler.
-    This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
-    methods the library implements for all schedulers such as loading and saving.
-    Args:
-        num_train_timesteps (`int`, defaults to 1000):
-            The number of diffusion steps to train the model.
-        timestep_spacing (`str`, defaults to `"linspace"`):
-            The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
-            Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
-        shift (`float`, defaults to 1.0):
-            The shift value for the timestep schedule.
-    """
-    _compatibles = []
-    order = 1
-    @register_to_config
-    def __init__(
-        self,
-        num_train_timesteps: int = 1000,
-        shift: float = 1.0,
-        use_dynamic_shifting=False,
-    ):
-        timesteps = np.linspace(1, num_train_timesteps, num_train_timesteps, dtype=np.float32).copy()
-        timesteps = torch.from_numpy(timesteps).to(dtype=torch.float32)
-        sigmas = timesteps / num_train_timesteps
-        if not use_dynamic_shifting:
-            # when use_dynamic_shifting is True, we apply the timestep shifting on the fly based on the image resolution
-            sigmas = shift * sigmas / (1 + (shift - 1) * sigmas)
-        self.timesteps = sigmas * num_train_timesteps
-        self._step_index = None
-        self._begin_index = None
-        self.sigmas = sigmas.to("cpu")  # to avoid too much CPU/GPU communication
-        self.sigma_min = self.sigmas[-1].item()
-        self.sigma_max = self.sigmas[0].item()
-    @property
-    def step_index(self):
-        """
-        The index counter for current timestep. It will increase 1 after each scheduler step.
-        """
-        return self._step_index
-    @property
-    def begin_index(self):
-        """
-        The index for the first timestep. It should be set from pipeline with `set_begin_index` method.
-        """
-        return self._begin_index
-    # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index
-    def set_begin_index(self, begin_index: int = 0):
-        """
-        Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
-        Args:
-            begin_index (`int`):
-                The begin index for the scheduler.
-        """
-        self._begin_index = begin_index
-    def scale_noise(
-        self,
-        sample: torch.FloatTensor,
-        timestep: Union[float, torch.FloatTensor],
-        noise: Optional[torch.FloatTensor] = None,
-    ) -> torch.FloatTensor:
-        """
-        Forward process in flow-matching
-        Args:
-            sample (`torch.FloatTensor`):
-                The input sample.
-            timestep (`int`, *optional*):
-                The current timestep in the diffusion chain.
-        Returns:
-            `torch.FloatTensor`:
-                A scaled input sample.
-        """
-        # Make sure sigmas and timesteps have the same device and dtype as original_samples
-        sigmas = self.sigmas.to(device=sample.device, dtype=sample.dtype)
-        if sample.device.type == "mps" and torch.is_floating_point(timestep):
-            # mps does not support float64
-            schedule_timesteps = self.timesteps.to(sample.device, dtype=torch.float32)
-            timestep = timestep.to(sample.device, dtype=torch.float32)
-        else:
-            schedule_timesteps = self.timesteps.to(sample.device)
-            timestep = timestep.to(sample.device)
-        # self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
-        if self.begin_index is None:
-            step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timestep]
-        elif self.step_index is not None:
-            # add_noise is called after first denoising step (for inpainting)
-            step_indices = [self.step_index] * timestep.shape[0]
-        else:
-            # add noise is called before first denoising step to create initial latent(img2img)
-            step_indices = [self.begin_index] * timestep.shape[0]
-        sigma = sigmas[step_indices].flatten()
-        while len(sigma.shape) < len(sample.shape):
-            sigma = sigma.unsqueeze(-1)
-        sample = sigma * noise + (1.0 - sigma) * sample
-        return sample
-    def _sigma_to_t(self, sigma):
-        return sigma * self.config.num_train_timesteps
-    def time_shift(self, mu: float, sigma: float, t: torch.Tensor):
-        return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma)
-    def set_timesteps(
-        self,
-        num_inference_steps: int = None,
-        device: Union[str, torch.device] = None,
-        sigmas: Optional[List[float]] = None,
-        mu: Optional[float] = None,
-    ):
-        """
-        Sets the discrete timesteps used for the diffusion chain (to be run before inference).
-        Args:
-            num_inference_steps (`int`):
-                The number of diffusion steps used when generating samples with a pre-trained model.
-            device (`str` or `torch.device`, *optional*):
-                The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
-        """
-        if self.config.use_dynamic_shifting and mu is None:
-            raise ValueError(" you have a pass a value for `mu` when `use_dynamic_shifting` is set to be `True`")
-        if sigmas is None:
-            self.num_inference_steps = num_inference_steps
-            timesteps = np.linspace(
-                self._sigma_to_t(self.sigma_max), self._sigma_to_t(self.sigma_min), num_inference_steps
-            )
-            sigmas = timesteps / self.config.num_train_timesteps
-        if self.config.use_dynamic_shifting:
-            sigmas = self.time_shift(mu, 1.0, sigmas)
-        else:
-            sigmas = self.config.shift * sigmas / (1 + (self.config.shift - 1) * sigmas)
-        sigmas = torch.from_numpy(sigmas).to(dtype=torch.float32, device=device)
-        timesteps = sigmas * self.config.num_train_timesteps
-        self.timesteps = timesteps.to(device=device)
-        self.sigmas = torch.cat([sigmas, torch.ones(1, device=sigmas.device)])
-        self._step_index = None
-        self._begin_index = None
-    def index_for_timestep(self, timestep, schedule_timesteps=None):
-        if schedule_timesteps is None:
-            schedule_timesteps = self.timesteps
-        indices = (schedule_timesteps == timestep).nonzero()
-        # The sigma index that is taken for the **very** first `step`
-        # is always the second index (or the last index if there is only 1)
-        # This way we can ensure we don't accidentally skip a sigma in
-        # case we start in the middle of the denoising schedule (e.g. for image-to-image)
-        pos = 1 if len(indices) > 1 else 0
-        return indices[pos].item()
-    def _init_step_index(self, timestep):
-        if self.begin_index is None:
-            if isinstance(timestep, torch.Tensor):
-                timestep = timestep.to(self.timesteps.device)
-            self._step_index = self.index_for_timestep(timestep)
-        else:
-            self._step_index = self._begin_index
-    def step(
-        self,
-        model_output: torch.FloatTensor,
-        timestep: Union[float, torch.FloatTensor],
-        sample: torch.FloatTensor,
-        s_churn: float = 0.0,
-        s_tmin: float = 0.0,
-        s_tmax: float = float("inf"),
-        s_noise: float = 1.0,
-        generator: Optional[torch.Generator] = None,
-        return_dict: bool = True,
-    ) -> Union[FlowMatchEulerDiscreteSchedulerOutput, Tuple]:
-        """
-        Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
-        process from the learned model outputs (most often the predicted noise).
-        Args:
-            model_output (`torch.FloatTensor`):
-                The direct output from learned diffusion model.
-            timestep (`float`):
-                The current discrete timestep in the diffusion chain.
-            sample (`torch.FloatTensor`):
-                A current instance of a sample created by the diffusion process.
-            s_churn (`float`):
-            s_tmin  (`float`):
-            s_tmax  (`float`):
-            s_noise (`float`, defaults to 1.0):
-                Scaling factor for noise added to the sample.
-            generator (`torch.Generator`, *optional*):
-                A random number generator.
-            return_dict (`bool`):
-                Whether or not to return a [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or
-                tuple.
-        Returns:
-            [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or `tuple`:
-                If return_dict is `True`, [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] is
-                returned, otherwise a tuple is returned where the first element is the sample tensor.
-        """
-        if (
-            isinstance(timestep, int)
-            or isinstance(timestep, torch.IntTensor)
-            or isinstance(timestep, torch.LongTensor)
-        ):
-            raise ValueError(
-                (
-                    "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to"
-                    " `EulerDiscreteScheduler.step()` is not supported. Make sure to pass"
-                    " one of the `scheduler.timesteps` as a timestep."
-                ),
-            )
-        if self.step_index is None:
-            self._init_step_index(timestep)
-        # Upcast to avoid precision issues when computing prev_sample
-        sample = sample.to(torch.float32).to(model_output.device)
-        sigma = self.sigmas[self.step_index].to(model_output.device)
-        sigma_next = self.sigmas[self.step_index + 1].to(model_output.device)
-        prev_sample = sample + (sigma_next - sigma) * model_output
-        # Cast sample back to model compatible dtype
-        prev_sample = prev_sample.to(model_output.dtype)
-        # upon completion increase step index by one
-        self._step_index += 1
-        if not return_dict:
-            return (prev_sample,)
-        return FlowMatchEulerDiscreteSchedulerOutput(prev_sample=prev_sample)
-    def __len__(self):
-        return self.config.num_train_timesteps
-@dataclass
-class ConsistencyFlowMatchEulerDiscreteSchedulerOutput(BaseOutput):
-    prev_sample: torch.FloatTensor
-    pred_original_sample: torch.FloatTensor
-class ConsistencyFlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
-    _compatibles = []
-    order = 1
-    @register_to_config
-    def __init__(
-        self,
-        num_train_timesteps: int = 1000,
-        pcm_timesteps: int = 50,
-    ):
-        sigmas = np.linspace(0, 1, num_train_timesteps)
-        step_ratio = num_train_timesteps // pcm_timesteps
-        euler_timesteps = (np.arange(1, pcm_timesteps) * step_ratio).round().astype(np.int64) - 1
-        euler_timesteps = np.asarray([0] + euler_timesteps.tolist())
-        self.euler_timesteps = euler_timesteps
-        self.sigmas = sigmas[self.euler_timesteps]
-        self.sigmas = torch.from_numpy((self.sigmas.copy())).to(dtype=torch.float32)
-        self.timesteps = self.sigmas * num_train_timesteps
-        self._step_index = None
-        self._begin_index = None
-        self.sigmas = self.sigmas.to("cpu")  # to avoid too much CPU/GPU communication
-    @property
-    def step_index(self):
-        """
-        The index counter for current timestep. It will increase 1 after each scheduler step.
-        """
-        return self._step_index
-    @property
-    def begin_index(self):
-        """
-        The index for the first timestep. It should be set from pipeline with `set_begin_index` method.
-        """
-        return self._begin_index
-    # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index
-    def set_begin_index(self, begin_index: int = 0):
-        """
-        Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
-        Args:
-            begin_index (`int`):
-                The begin index for the scheduler.
-        """
-        self._begin_index = begin_index
-    def _sigma_to_t(self, sigma):
-        return sigma * self.config.num_train_timesteps
-    def set_timesteps(
-        self,
-        num_inference_steps: int = None,
-        device: Union[str, torch.device] = None,
-        sigmas: Optional[List[float]] = None,
-    ):
-        """
-        Sets the discrete timesteps used for the diffusion chain (to be run before inference).
-        Args:
-            num_inference_steps (`int`):
-                The number of diffusion steps used when generating samples with a pre-trained model.
-            device (`str` or `torch.device`, *optional*):
-                The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
-        """
-        self.num_inference_steps = num_inference_steps if num_inference_steps is not None else len(sigmas)
-        inference_indices = np.linspace(
-            0, self.config.pcm_timesteps, num=self.num_inference_steps, endpoint=False
-        )
-        inference_indices = np.floor(inference_indices).astype(np.int64)
-        inference_indices = torch.from_numpy(inference_indices).long()
-        self.sigmas_ = self.sigmas[inference_indices]
-        timesteps = self.sigmas_ * self.config.num_train_timesteps
-        self.timesteps = timesteps.to(device=device)
-        self.sigmas_ = torch.cat(
-            [self.sigmas_, torch.ones(1, device=self.sigmas_.device)]
-        )
-        self._step_index = None
-        self._begin_index = None
-    def index_for_timestep(self, timestep, schedule_timesteps=None):
-        if schedule_timesteps is None:
-            schedule_timesteps = self.timesteps
-        indices = (schedule_timesteps == timestep).nonzero()
-        # The sigma index that is taken for the **very** first `step`
-        # is always the second index (or the last index if there is only 1)
-        # This way we can ensure we don't accidentally skip a sigma in
-        # case we start in the middle of the denoising schedule (e.g. for image-to-image)
-        pos = 1 if len(indices) > 1 else 0
-        return indices[pos].item()
-    def _init_step_index(self, timestep):
-        if self.begin_index is None:
-            if isinstance(timestep, torch.Tensor):
-                timestep = timestep.to(self.timesteps.device)
-            self._step_index = self.index_for_timestep(timestep)
-        else:
-            self._step_index = self._begin_index
-    def step(
-        self,
-        model_output: torch.FloatTensor,
-        timestep: Union[float, torch.FloatTensor],
-        sample: torch.FloatTensor,
-        generator: Optional[torch.Generator] = None,
-        return_dict: bool = True,
-    ) -> Union[ConsistencyFlowMatchEulerDiscreteSchedulerOutput, Tuple]:
-        if (
-            isinstance(timestep, int)
-            or isinstance(timestep, torch.IntTensor)
-            or isinstance(timestep, torch.LongTensor)
-        ):
-            raise ValueError(
-                (
-                    "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to"
-                    " `EulerDiscreteScheduler.step()` is not supported. Make sure to pass"
-                    " one of the `scheduler.timesteps` as a timestep."
-                ),
-            )
-        if self.step_index is None:
-            self._init_step_index(timestep)
-        sample = sample.to(torch.float32).to(model_output.device)
-        sigma = self.sigmas_[self.step_index].to(model_output.device)
-        sigma_next = self.sigmas_[self.step_index + 1].to(model_output.device)
-        prev_sample = sample + (sigma_next - sigma) * model_output
-        prev_sample = prev_sample.to(model_output.dtype)
-        pred_original_sample = sample + (1.0 - sigma) * model_output
-        pred_original_sample = pred_original_sample.to(model_output.dtype)
-        self._step_index += 1
-        if not return_dict:
-            return (prev_sample,)
-        return ConsistencyFlowMatchEulerDiscreteSchedulerOutput(prev_sample=prev_sample,
-                                                                pred_original_sample=pred_original_sample)
-    def __len__(self):
-        return self.config.num_train_timesteps

ultrashape/surface_loaders.py DELETED Viewed

@@ -1,233 +0,0 @@
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import numpy as np
-import torch
-import trimesh
-def normalize_mesh(mesh, scale=0.9999):
-    """
-    Normalize the mesh to fit inside a centered cube with a specified scale.
-    The mesh is translated so that its bounding box center is at the origin,
-    then uniformly scaled so that the longest side of the bounding box fits within [-scale, scale].
-    Args:
-        mesh (trimesh.Trimesh): Input mesh to normalize.
-        scale (float, optional): Scaling factor to slightly shrink the mesh inside the unit cube. Default is 0.9999.
-    Returns:
-        trimesh.Trimesh: The normalized mesh with applied translation and scaling.
-    """
-    bbox = mesh.bounds
-    center = (bbox[1] + bbox[0]) / 2
-    scale_ = (bbox[1] - bbox[0]).max()
-    mesh.apply_translation(-center)
-    mesh.apply_scale(1 / scale_ * 2 * scale)
-    return mesh
-def sample_pointcloud(mesh, num=200000):
-    """
-    Sample points uniformly from the surface of the mesh along with their corresponding face normals.
-    Args:
-        mesh (trimesh.Trimesh): Input mesh to sample from.
-        num (int, optional): Number of points to sample. Default is 200000.
-    Returns:
-        Tuple[torch.Tensor, torch.Tensor]:
-            - points: Sampled points as a float tensor of shape (num, 3).
-            - normals: Corresponding normals as a float tensor of shape (num, 3).
-    """
-    points, face_idx = mesh.sample(num, return_index=True)
-    normals = mesh.face_normals[face_idx]
-    points = torch.from_numpy(points.astype(np.float32))
-    normals = torch.from_numpy(normals.astype(np.float32))
-    return points, normals
-def load_surface(mesh, num_points=8192):
-    """
-    Normalize the mesh, sample points and normals from its surface, and randomly select a subset.
-    Args:
-        mesh (trimesh.Trimesh): Input mesh to process.
-        num_points (int, optional): Number of points to randomly select
-                from the sampled surface points. Default is 8192.
-    Returns:
-        Tuple[torch.Tensor, trimesh.Trimesh]:
-            - surface: Tensor of shape (1, num_points, 6), concatenating points and normals.
-            - mesh: The normalized mesh.
-    """
-    mesh = normalize_mesh(mesh, scale=0.98)
-    surface, normal = sample_pointcloud(mesh)
-    rng = np.random.default_rng(seed=0)
-    ind = rng.choice(surface.shape[0], num_points, replace=False)
-    surface = torch.FloatTensor(surface[ind])
-    normal = torch.FloatTensor(normal[ind])
-    surface = torch.cat([surface, normal], dim=-1).unsqueeze(0)
-    return surface, mesh
-def sharp_sample_pointcloud(mesh, num=16384):
-    """
-    Sample points and normals preferentially from sharp edges of the mesh.
-    Sharp edges are detected based on the angle between vertex normals and face normals.
-    Points are sampled along these edges proportionally to edge length.
-    Args:
-        mesh (trimesh.Trimesh): Input mesh to sample from.
-        num (int, optional): Number of points to sample from sharp edges. Default is 16384.
-    Returns:
-        Tuple[np.ndarray, np.ndarray]:
-            - samples: Sampled points along sharp edges, shape (num, 3).
-            - normals: Corresponding interpolated normals, shape (num, 3).
-    """
-    V = mesh.vertices
-    N = mesh.face_normals
-    VN = mesh.vertex_normals
-    F = mesh.faces
-    VN2 = np.ones(V.shape[0])
-    for i in range(3):
-        dot = np.stack((VN2[F[:, i]], np.sum(VN[F[:, i]] * N, axis=-1)), axis=-1)
-        VN2[F[:, i]] = np.min(dot, axis=-1)
-    sharp_mask = VN2 < 0.985
-    # collect edge
-    edge_a = np.concatenate((F[:, 0], F[:, 1], F[:, 2]))
-    edge_b = np.concatenate((F[:, 1], F[:, 2], F[:, 0]))
-    sharp_edge = ((sharp_mask[edge_a] * sharp_mask[edge_b]))
-    edge_a = edge_a[sharp_edge > 0]
-    edge_b = edge_b[sharp_edge > 0]
-    sharp_verts_a = V[edge_a]
-    sharp_verts_b = V[edge_b]
-    sharp_verts_an = VN[edge_a]
-    sharp_verts_bn = VN[edge_b]
-    weights = np.linalg.norm(sharp_verts_b - sharp_verts_a, axis=-1)
-    weights /= np.sum(weights)
-    random_number = np.random.rand(num)
-    w = np.random.rand(num, 1)
-    index = np.searchsorted(weights.cumsum(), random_number)
-    samples = w * sharp_verts_a[index] + (1 - w) * sharp_verts_b[index]
-    normals = w * sharp_verts_an[index] + (1 - w) * sharp_verts_bn[index]
-    return samples, normals
-def load_surface_sharpegde(mesh, num_points=4096, num_sharp_points=4096, sharpedge_flag=True, normalize_scale=0.9999):
-    try:
-        mesh_full = trimesh.util.concatenate(mesh.dump())
-    except Exception as err:
-        mesh_full = trimesh.util.concatenate(mesh)
-    mesh_full = normalize_mesh(mesh_full, scale=normalize_scale)
-    origin_num = mesh_full.faces.shape[0]
-    original_vertices = mesh_full.vertices
-    original_faces = mesh_full.faces
-    mesh = trimesh.Trimesh(vertices=original_vertices, faces=original_faces[:origin_num])
-    mesh_fill = trimesh.Trimesh(vertices=original_vertices, faces=original_faces[origin_num:])
-    area = mesh.area
-    area_fill = mesh_fill.area
-    sample_num = 819200 // 2 # 499712 // 2
-    num_fill = int(sample_num * (area_fill / (area + area_fill)))
-    num = sample_num - num_fill
-    random_surface, random_normal = sample_pointcloud(mesh, num=num)
-    if num_fill == 0:
-        random_surface_fill, random_normal_fill = np.zeros((0, 3)), np.zeros((0, 3))
-    else:
-        random_surface_fill, random_normal_fill = sample_pointcloud(mesh_fill, num=num_fill)
-    random_sharp_surface, sharp_normal = sharp_sample_pointcloud(mesh, num=sample_num)
-    # save_surface
-    surface = np.concatenate((random_surface, random_normal), axis=1).astype(np.float16)
-    surface_fill = np.concatenate((random_surface_fill, random_normal_fill), axis=1).astype(np.float16)
-    sharp_surface = np.concatenate((random_sharp_surface, sharp_normal), axis=1).astype(np.float16)
-    surface = np.concatenate((surface, surface_fill), axis=0)
-    if sharpedge_flag:
-        sharpedge_label = np.zeros((surface.shape[0], 1))
-        surface = np.concatenate((surface, sharpedge_label), axis=1)
-        sharpedge_label = np.ones((sharp_surface.shape[0], 1))
-        sharp_surface = np.concatenate((sharp_surface, sharpedge_label), axis=1)
-    rng = np.random.default_rng()
-    ind = rng.choice(surface.shape[0], num_points, replace=False)
-    surface = torch.FloatTensor(surface[ind])
-    ind = rng.choice(sharp_surface.shape[0], num_sharp_points, replace=False)
-    sharp_surface = torch.FloatTensor(sharp_surface[ind])
-    return torch.cat([surface, sharp_surface], dim=0).unsqueeze(0), mesh_full
-class SurfaceLoader:
-    def __init__(self, num_points=8192):
-        self.num_points = num_points
-    def __call__(self, mesh_or_mesh_path, num_points=None):
-        if num_points is None:
-            num_points = self.num_points
-        mesh = mesh_or_mesh_path
-        if isinstance(mesh, str):
-            mesh = trimesh.load(mesh, force="mesh", merge_primitives=True)
-        if isinstance(mesh, trimesh.scene.Scene):
-            for idx, obj in enumerate(mesh.geometry.values()):
-                if idx == 0:
-                    temp_mesh = obj
-                else:
-                    temp_mesh = temp_mesh + obj
-            mesh = temp_mesh
-        surface, mesh = load_surface(mesh, num_points=num_points)
-        return surface
-class SharpEdgeSurfaceLoader:
-    def __init__(self, num_uniform_points=8192, num_sharp_points=8192, **kwargs):
-        self.num_uniform_points = num_uniform_points
-        self.num_sharp_points = num_sharp_points
-        self.num_points = num_uniform_points + num_sharp_points
-    def __call__(self, mesh_or_mesh_path, num_uniform_points=None,
-        num_sharp_points=None, normalize_scale=0.9999):
-        if num_uniform_points is None:
-            num_uniform_points = self.num_uniform_points
-        if num_sharp_points is None:
-            num_sharp_points = self.num_sharp_points
-        mesh = mesh_or_mesh_path
-        if isinstance(mesh, str):
-            mesh = trimesh.load(mesh, force="mesh", merge_primitives=True)
-        if isinstance(mesh, trimesh.scene.Scene):
-            for idx, obj in enumerate(mesh.geometry.values()):
-                if idx == 0:
-                    temp_mesh = obj
-                else:
-                    temp_mesh = temp_mesh + obj
-            mesh = temp_mesh
-        surface, mesh = load_surface_sharpegde(mesh, num_points=num_uniform_points,
-            num_sharp_points=num_sharp_points, normalize_scale=normalize_scale)
-        return surface

ultrashape/utils/__init__.py DELETED Viewed

@@ -1,6 +0,0 @@
-# -*- coding: utf-8 -*-
-from .misc import get_config_from_file
-from .misc import instantiate_from_config
-from .utils import get_logger, logger, synchronize_timer, smart_load_model
-from .voxelize import voxelize_from_point

ultrashape/utils/ema.py DELETED Viewed

@@ -1,76 +0,0 @@
-import torch
-from torch import nn
-class LitEma(nn.Module):
-    def __init__(self, model, decay=0.9999, use_num_updates=True):
-        super().__init__()
-        if decay < 0.0 or decay > 1.0:
-            raise ValueError('Decay must be between 0 and 1')
-        self.m_name2s_name = {}
-        self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32))
-        self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_updates
-        else torch.tensor(-1, dtype=torch.int))
-        for name, p in model.named_parameters():
-            if p.requires_grad:
-                # remove as '.'-character is not allowed in buffers
-                s_name = name.replace('.', '_____')
-                self.m_name2s_name.update({name: s_name})
-                self.register_buffer(s_name, p.clone().detach().data)
-        self.collected_params = []
-    def forward(self, model):
-        decay = self.decay
-        if self.num_updates >= 0:
-            self.num_updates += 1
-            decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates))
-        one_minus_decay = 1.0 - decay
-        with torch.no_grad():
-            m_param = dict(model.named_parameters())
-            shadow_params = dict(self.named_buffers())
-            for key in m_param:
-                if m_param[key].requires_grad:
-                    sname = self.m_name2s_name[key]
-                    shadow_params[sname] = shadow_params[sname].type_as(m_param[key])
-                    shadow_params[sname].sub_(one_minus_decay * (shadow_params[sname] - m_param[key]))
-                else:
-                    assert not key in self.m_name2s_name
-    def copy_to(self, model):
-        m_param = dict(model.named_parameters())
-        shadow_params = dict(self.named_buffers())
-        for key in m_param:
-            if m_param[key].requires_grad:
-                m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data)
-            else:
-                assert not key in self.m_name2s_name
-    def store(self, model):
-        """
-        Save the current parameters for restoring later.
-        Args:
-          parameters: Iterable of `torch.nn.Parameter`; the parameters to be
-            temporarily stored.
-        """
-        self.collected_params = [param.clone() for param in model.parameters()]
-    def restore(self, model):
-        """
-        Restore the parameters stored with the `store` method.
-        Useful to validate the model with EMA parameters without affecting the
-        original optimization process. Store the parameters before the
-        `copy_to` method. After validation (or model saving), use this to
-        restore the former parameters.
-        Args:
-          parameters: Iterable of `torch.nn.Parameter`; the parameters to be
-            updated with the stored parameters.
-        """
-        for c_param, param in zip(self.collected_params, model.parameters()):
-            param.data.copy_(c_param.data)

ultrashape/utils/misc.py DELETED Viewed

@@ -1,200 +0,0 @@
-# -*- coding: utf-8 -*-
-import importlib
-from omegaconf import OmegaConf, DictConfig, ListConfig
-import torch
-import torch.distributed as dist
-from typing import Union
-from .utils import logger
-import os
-def get_config_from_file(config_file: str) -> Union[DictConfig, ListConfig]:
-    config_file = OmegaConf.load(config_file)
-    if 'base_config' in config_file.keys():
-        if config_file['base_config'] == "default_base":
-            base_config = OmegaConf.create()
-            # base_config = get_default_config()
-        elif config_file['base_config'].endswith(".yaml"):
-            base_config = get_config_from_file(config_file['base_config'])
-        else:
-            raise ValueError(f"{config_file} must be `.yaml` file or it contains `base_config` key.")
-        config_file = {key: value for key, value in config_file if key != "base_config"}
-        return OmegaConf.merge(base_config, config_file)
-    return config_file
-def get_obj_from_str(string, reload=False):
-    module, cls = string.rsplit(".", 1)
-    if reload:
-        module_imp = importlib.import_module(module)
-        importlib.reload(module_imp)
-    return getattr(importlib.import_module(module, package=None), cls)
-def get_obj_from_config(config):
-    if "target" not in config:
-        raise KeyError("Expected key `target` to instantiate.")
-    return get_obj_from_str(config["target"])
-def instantiate_from_config(config, **kwargs):
-    if "target" not in config:
-        raise KeyError("Expected key `target` to instantiate.")
-    cls = get_obj_from_str(config["target"])
-    if config.get("from_pretrained", None):
-        return cls.from_pretrained(
-                    config["from_pretrained"],
-                    use_safetensors=config.get('use_safetensors', False),
-                    variant=config.get('variant', 'fp16'))
-    params = config.get("params", dict())
-    # params.update(kwargs)
-    # instance = cls(**params)
-    kwargs.update(params)
-    instance = cls(**kwargs)
-    return instance
-def instantiate_vae_from_config(config, **kwargs):
-    if "target" not in config:
-        raise KeyError("Expected key `target` to instantiate.")
-    cls = get_obj_from_str(config["target"])
-    if config.get("from_pretrained", None):
-        return cls.from_pretrained(
-                    config["from_pretrained"],
-                    params=config.get("params", dict()),
-                    use_safetensors=config.get('use_safetensors', False),
-                    variant=config.get('variant', 'fp16'))
-    params = config.get("params", dict())
-    kwargs.update(params)
-    instance = cls(**kwargs)
-    return instance
-def instantiate_vae_from_config_local(config, **kwargs):
-    if "target" not in config:
-        raise KeyError("Expected key `target` to instantiate.")
-    cls = get_obj_from_str(config["target"])
-    if not config.get("from_pretrained", None):
-        raise FileNotFoundError(f"Need from_pretrained!")
-    ckpt_path = config["from_pretrained"]
-    logger.info(f"Loading model from {ckpt_path}")
-    if not os.path.exists(ckpt_path):
-        raise FileNotFoundError(f"Model file {ckpt_path} not found")
-    ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=True)
-    if 'state_dict' not in ckpt:
-        # deepspeed ckpt
-        state_dict = {}
-        for k in ckpt.keys():
-            new_k = k.replace('vae_model.', '')
-            state_dict[new_k] = ckpt[k]
-    else:
-        state_dict = ckpt["state_dict"]
-    params = config.get("params", dict())
-    kwargs.update(params)
-    instance = cls(**kwargs)
-    missing, unexpected = instance.load_state_dict(state_dict)
-    print(f"VAE Missing Keys: {missing}")
-    print(f"VAE Unexpected Keys: {unexpected}")
-    return instance
-def disabled_train(self, mode=True):
-    """Overwrite model.train with this function to make sure train/eval mode
-    does not change anymore."""
-    return self
-def instantiate_non_trainable_model(config):
-    model = instantiate_from_config(config)
-    model = model.eval()
-    model.train = disabled_train
-    for param in model.parameters():
-        param.requires_grad = False
-    return model
-def instantiate_vae_model(config, requires_grad=False):
-    model = instantiate_vae_from_config(config)
-    model = model.eval()
-    model.train = disabled_train
-    for param in model.parameters():
-        param.requires_grad = requires_grad
-    return model
-def instantiate_vae_model_local(config, requires_grad=False):
-    model = instantiate_vae_from_config_local(config)
-    model = model.eval()
-    model.train = disabled_train
-    for param in model.parameters():
-        param.requires_grad = requires_grad
-    return model
-def is_dist_avail_and_initialized():
-    if not dist.is_available():
-        return False
-    if not dist.is_initialized():
-        return False
-    return True
-def get_rank():
-    if not is_dist_avail_and_initialized():
-        return 0
-    return dist.get_rank()
-def get_world_size():
-    if not is_dist_avail_and_initialized():
-        return 1
-    return dist.get_world_size()
-def all_gather_batch(tensors):
-    """
-    Performs all_gather operation on the provided tensors.
-    """
-    # Queue the gathered tensors
-    world_size = get_world_size()
-    # There is no need for reduction in the single-proc case
-    if world_size == 1:
-        return tensors
-    tensor_list = []
-    output_tensor = []
-    for tensor in tensors:
-        tensor_all = [torch.ones_like(tensor) for _ in range(world_size)]
-        dist.all_gather(
-            tensor_all,
-            tensor,
-            async_op=False  # performance opt
-        )
-        tensor_list.append(tensor_all)
-    for tensor_all in tensor_list:
-        output_tensor.append(torch.cat(tensor_all, dim=0))
-    return output_tensor

ultrashape/utils/trainings/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- # -- coding: utf-8 --

ultrashape/utils/trainings/callback.py DELETED Viewed

@@ -1,213 +0,0 @@
-# ------------------------------------------------------------------------------------
-# Modified from Taming Transformers (https://github.com/CompVis/taming-transformers)
-# Copyright (c) 2020 Patrick Esser and Robin Rombach and Björn Ommer. All Rights Reserved.
-# ------------------------------------------------------------------------------------
-import os
-import time
-import wandb
-import numpy as np
-from PIL import Image
-from pathlib import Path
-from omegaconf import OmegaConf, DictConfig
-from typing import Tuple, Generic, Dict, Callable, Optional, Any
-from pprint import pprint
-import torch
-import torchvision
-import pytorch_lightning as pl
-import pytorch_lightning.loggers
-from pytorch_lightning.loggers import WandbLogger
-from pytorch_lightning.loggers.logger import DummyLogger
-from pytorch_lightning.utilities import rank_zero_only, rank_zero_info
-from pytorch_lightning.callbacks import Callback
-from functools import wraps
-def node_zero_only(fn: Callable) -> Callable:
-    @wraps(fn)
-    def wrapped_fn(*args, **kwargs) -> Optional[Any]:
-        if node_zero_only.node == 0:
-            return fn(*args, **kwargs)
-        return None
-    return wrapped_fn
-node_zero_only.node = getattr(node_zero_only, 'node', int(os.environ.get('NODE_RANK', 0)))
-def node_zero_experiment(fn: Callable) -> Callable:
-    """Returns the real experiment on rank 0 and otherwise the DummyExperiment."""
-    @wraps(fn)
-    def experiment(self):
-        @node_zero_only
-        def get_experiment():
-            return fn(self)
-        return get_experiment() or DummyLogger.experiment
-    return experiment
-# customize wandb for node 0 only
-class MyWandbLogger(WandbLogger):
-    @WandbLogger.experiment.getter
-    @node_zero_experiment
-    def experiment(self):
-        return super().experiment
-class SetupCallback(Callback):
-    def __init__(self, config: DictConfig, exp_config: DictConfig,
-                 basedir: Path, logdir: str = "log", ckptdir: str = "ckpt") -> None:
-        super().__init__()
-        self.logdir = basedir / logdir
-        self.ckptdir = basedir / ckptdir
-        self.config = config
-        self.exp_config = exp_config
-    # def on_pretrain_routine_start(self, trainer: pl.trainer.Trainer, pl_module: pl.LightningModule) -> None:
-    #     if trainer.global_rank == 0:
-    #         # Create logdirs and save configs
-    #         os.makedirs(self.logdir, exist_ok=True)
-    #         os.makedirs(self.ckptdir, exist_ok=True)
-    #
-    #         print("Experiment config")
-    #         print(self.exp_config.pretty())
-    #
-    #         print("Model config")
-    #         print(self.config.pretty())
-    def on_fit_start(self, trainer: pl.trainer.Trainer, pl_module: pl.LightningModule) -> None:
-        if trainer.global_rank == 0:
-            # Create logdirs and save configs
-            os.makedirs(self.logdir, exist_ok=True)
-            os.makedirs(self.ckptdir, exist_ok=True)
-            # print("Experiment config")
-            # pprint(self.exp_config)
-            #
-            # print("Model config")
-            # pprint(self.config)
-class ImageLogger(Callback):
-    def __init__(self, batch_frequency: int, max_images: int, clamp: bool = True,
-                 increase_log_steps: bool = True) -> None:
-        super().__init__()
-        self.batch_freq = batch_frequency
-        self.max_images = max_images
-        self.logger_log_images = {
-            pl.loggers.WandbLogger: self._wandb,
-            pl.loggers.TestTubeLogger: self._testtube,
-        }
-        self.log_steps = [2 ** n for n in range(int(np.log2(self.batch_freq)) + 1)]
-        if not increase_log_steps:
-            self.log_steps = [self.batch_freq]
-        self.clamp = clamp
-    @rank_zero_only
-    def _wandb(self, pl_module, images, batch_idx, split):
-        # raise ValueError("No way wandb")
-        grids = dict()
-        for k in images:
-            grid = torchvision.utils.make_grid(images[k])
-            grids[f"{split}/{k}"] = wandb.Image(grid)
-        pl_module.logger.experiment.log(grids)
-    @rank_zero_only
-    def _testtube(self, pl_module, images, batch_idx, split):
-        for k in images:
-            grid = torchvision.utils.make_grid(images[k])
-            grid = (grid + 1.0) / 2.0  # -1,1 -> 0,1; c,h,w
-            tag = f"{split}/{k}"
-            pl_module.logger.experiment.add_image(
-                tag, grid,
-                global_step=pl_module.global_step)
-    @rank_zero_only
-    def log_local(self, save_dir: str, split: str, images: Dict,
-                  global_step: int, current_epoch: int, batch_idx: int) -> None:
-        root = os.path.join(save_dir, "results", split)
-        os.makedirs(root, exist_ok=True)
-        for k in images:
-            grid = torchvision.utils.make_grid(images[k], nrow=4)
-            grid = grid.transpose(0, 1).transpose(1, 2).squeeze(-1)
-            grid = grid.numpy()
-            grid = (grid * 255).astype(np.uint8)
-            filename = "{}_gs-{:06}_e-{:06}_b-{:06}.png".format(
-                k,
-                global_step,
-                current_epoch,
-                batch_idx)
-            path = os.path.join(root, filename)
-            os.makedirs(os.path.split(path)[0], exist_ok=True)
-            Image.fromarray(grid).save(path)
-    def log_img(self, pl_module: pl.LightningModule, batch: Tuple[torch.LongTensor, torch.FloatTensor], batch_idx: int,
-                split: str = "train") -> None:
-        if (self.check_frequency(batch_idx) and  # batch_idx % self.batch_freq == 0
-                hasattr(pl_module, "log_images") and
-                callable(pl_module.log_images) and
-                self.max_images > 0):
-            logger = type(pl_module.logger)
-            is_train = pl_module.training
-            if is_train:
-                pl_module.eval()
-            with torch.no_grad():
-                images = pl_module.log_images(batch, split=split, pl_module=pl_module)
-            for k in images:
-                N = min(images[k].shape[0], self.max_images)
-                images[k] = images[k][:N].detach().cpu()
-                if self.clamp:
-                    images[k] = images[k].clamp(0, 1)
-            self.log_local(pl_module.logger.save_dir, split, images,
-                           pl_module.global_step, pl_module.current_epoch, batch_idx)
-            logger_log_images = self.logger_log_images.get(logger, lambda *args, **kwargs: None)
-            logger_log_images(pl_module, images, pl_module.global_step, split)
-            if is_train:
-                pl_module.train()
-    def check_frequency(self, batch_idx: int) -> bool:
-        if (batch_idx % self.batch_freq) == 0 or (batch_idx in self.log_steps):
-            try:
-                self.log_steps.pop(0)
-            except IndexError:
-                pass
-            return True
-        return False
-    def on_train_batch_end(self, trainer: pl.trainer.Trainer, pl_module: pl.LightningModule,
-                           outputs: Generic, batch: Tuple[torch.LongTensor, torch.FloatTensor], batch_idx: int) -> None:
-        self.log_img(pl_module, batch, batch_idx, split="train")
-    def on_validation_batch_end(self, trainer: pl.trainer.Trainer, pl_module: pl.LightningModule,
-                                outputs: Generic, batch: Tuple[torch.LongTensor, torch.FloatTensor],
-                                dataloader_idx: int, batch_idx: int) -> None:
-        self.log_img(pl_module, batch, batch_idx, split="val")
-class CUDACallback(Callback):
-    # see https://github.com/SeanNaren/minGPT/blob/master/mingpt/callback.py
-    def on_train_epoch_start(self, trainer, pl_module):
-        # Reset the memory use counter
-        torch.cuda.reset_peak_memory_stats(trainer.root_gpu)
-        torch.cuda.synchronize(trainer.root_gpu)
-        self.start_time = time.time()
-    def on_train_epoch_end(self, trainer, pl_module, outputs):
-        torch.cuda.synchronize(trainer.root_gpu)
-        max_memory = torch.cuda.max_memory_allocated(trainer.root_gpu) / 2 ** 20
-        epoch_time = time.time() - self.start_time
-        try:
-            max_memory = trainer.training_type_plugin.reduce(max_memory)
-            epoch_time = trainer.training_type_plugin.reduce(epoch_time)
-            rank_zero_info(f"Average Epoch time: {epoch_time:.2f} seconds")
-            rank_zero_info(f"Average Peak memory {max_memory:.2f}MiB")
-        except AttributeError:
-            pass

ultrashape/utils/trainings/lr_scheduler.py DELETED Viewed

@@ -1,53 +0,0 @@
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import numpy as np
-class BaseScheduler(object):
-    def schedule(self, n, **kwargs):
-        raise NotImplementedError
-class LambdaWarmUpCosineFactorScheduler(BaseScheduler):
-    """
-    note: use with a base_lr of 1.0
-    """
-    def __init__(self, warm_up_steps, f_min, f_max, f_start, max_decay_steps, verbosity_interval=0, **ignore_kwargs):
-        self.lr_warm_up_steps = warm_up_steps
-        self.f_start = f_start
-        self.f_min = f_min
-        self.f_max = f_max
-        self.lr_max_decay_steps = max_decay_steps
-        self.last_f = 0.
-        self.verbosity_interval = verbosity_interval
-    def schedule(self, n, **kwargs):
-        if self.verbosity_interval > 0:
-            if n % self.verbosity_interval == 0:
-                print(f"current step: {n}, recent lr-multiplier: {self.f_start}")
-        if n < self.lr_warm_up_steps:
-            f = (self.f_max - self.f_start) / self.lr_warm_up_steps * n + self.f_start
-            self.last_f = f
-            return f
-        else:
-            t = (n - self.lr_warm_up_steps) / (self.lr_max_decay_steps - self.lr_warm_up_steps)
-            t = min(t, 1.0)
-            f = self.f_min + 0.5 * (self.f_max - self.f_min) * (1 + np.cos(t * np.pi))
-            self.last_f = f
-            return f
-    def __call__(self, n, **kwargs):
-        return self.schedule(n, **kwargs)

ultrashape/utils/trainings/mesh.py DELETED Viewed

@@ -1,128 +0,0 @@
-# -*- coding: utf-8 -*-
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import os
-import cv2
-import numpy as np
-import PIL.Image
-from typing import Optional
-import trimesh
-def save_obj(pointnp_px3, facenp_fx3, fname):
-    fid = open(fname, "w")
-    write_str = ""
-    for pidx, p in enumerate(pointnp_px3):
-        pp = p
-        write_str += "v %f %f %f\n" % (pp[0], pp[1], pp[2])
-    for i, f in enumerate(facenp_fx3):
-        f1 = f + 1
-        write_str += "f %d %d %d\n" % (f1[0], f1[1], f1[2])
-    fid.write(write_str)
-    fid.close()
-    return
-def savemeshtes2(pointnp_px3, tcoords_px2, facenp_fx3, facetex_fx3, tex_map, fname):
-    fol, na = os.path.split(fname)
-    na, _ = os.path.splitext(na)
-    matname = "%s/%s.mtl" % (fol, na)
-    fid = open(matname, "w")
-    fid.write("newmtl material_0\n")
-    fid.write("Kd 1 1 1\n")
-    fid.write("Ka 0 0 0\n")
-    fid.write("Ks 0.4 0.4 0.4\n")
-    fid.write("Ns 10\n")
-    fid.write("illum 2\n")
-    fid.write("map_Kd %s.png\n" % na)
-    fid.close()
-    ####
-    fid = open(fname, "w")
-    fid.write("mtllib %s.mtl\n" % na)
-    for pidx, p3 in enumerate(pointnp_px3):
-        pp = p3
-        fid.write("v %f %f %f\n" % (pp[0], pp[1], pp[2]))
-    for pidx, p2 in enumerate(tcoords_px2):
-        pp = p2
-        fid.write("vt %f %f\n" % (pp[0], pp[1]))
-    fid.write("usemtl material_0\n")
-    for i, f in enumerate(facenp_fx3):
-        f1 = f + 1
-        f2 = facetex_fx3[i] + 1
-        fid.write("f %d/%d %d/%d %d/%d\n" % (f1[0], f2[0], f1[1], f2[1], f1[2], f2[2]))
-    fid.close()
-    PIL.Image.fromarray(np.ascontiguousarray(tex_map), "RGB").save(
-        os.path.join(fol, "%s.png" % na))
-    return
-class MeshOutput(object):
-    def __init__(self,
-                 mesh_v: np.ndarray,
-                 mesh_f: np.ndarray,
-                 vertex_colors: Optional[np.ndarray] = None,
-                 uvs: Optional[np.ndarray] = None,
-                 mesh_tex_idx: Optional[np.ndarray] = None,
-                 tex_map: Optional[np.ndarray] = None):
-        self.mesh_v = mesh_v
-        self.mesh_f = mesh_f
-        self.vertex_colors = vertex_colors
-        self.uvs = uvs
-        self.mesh_tex_idx = mesh_tex_idx
-        self.tex_map = tex_map
-    def contain_uv_texture(self):
-        return (self.uvs is not None) and (self.mesh_tex_idx is not None) and (self.tex_map is not None)
-    def contain_vertex_colors(self):
-        return self.vertex_colors is not None
-    def export(self, fname):
-        if self.contain_uv_texture():
-            savemeshtes2(
-                self.mesh_v,
-                self.uvs,
-                self.mesh_f,
-                self.mesh_tex_idx,
-                self.tex_map,
-                fname
-            )
-        elif self.contain_vertex_colors():
-            mesh_obj = trimesh.Trimesh(vertices=self.mesh_v, faces=self.mesh_f, vertex_colors=self.vertex_colors)
-            mesh_obj.export(fname)
-        else:
-            save_obj(
-                self.mesh_v,
-                self.mesh_f,
-                fname
-            )

ultrashape/utils/trainings/mesh_log_callback.py DELETED Viewed

@@ -1,342 +0,0 @@
-# -*- coding: utf-8 -*-
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import json
-import math
-import os
-from typing import Tuple, Generic, Dict, List, Union, Optional
-import trimesh
-import numpy as np
-import pytorch_lightning as pl
-import pytorch_lightning.loggers
-import torch
-import torchvision
-from pytorch_lightning.callbacks import Callback
-from pytorch_lightning.utilities import rank_zero_only
-from hy3dshape.pipelines import export_to_trimesh
-from hy3dshape.utils.trainings.mesh import MeshOutput
-from hy3dshape.utils.visualizers import html_util
-from hy3dshape.utils.visualizers.pythreejs_viewer import PyThreeJSViewer
-class ImageConditionalASLDiffuserLogger(Callback):
-    def __init__(self,
-                 step_frequency: int,
-                 num_samples: int = 1,
-                 mean: Optional[Union[List[float], Tuple[float]]] = None,
-                 std: Optional[Union[List[float], Tuple[float]]] = None,
-                 bounds: Union[List[float], Tuple[float]] = (-1.1, -1.1, -1.1, 1.1, 1.1, 1.1),
-                 **kwargs) -> None:
-        super().__init__()
-        self.bbox_size = np.array(bounds[3:6]) - np.array(bounds[0:3])
-        if mean is not None:
-            mean = np.asarray(mean)
-        if std is not None:
-            std = np.asarray(std)
-        self.mean = mean
-        self.std = std
-        self.step_freq = step_frequency
-        self.num_samples = num_samples
-        self.has_train_logged = False
-        self.logger_log_images = {
-            pl.loggers.WandbLogger: self._wandb,
-        }
-        self.viewer = PyThreeJSViewer(settings={}, render_mode="WEBSITE")
-    @rank_zero_only
-    def _wandb(self, pl_module, images, batch_idx, split):
-        # raise ValueError("No way wandb")
-        grids = dict()
-        for k in images:
-            grid = torchvision.utils.make_grid(images[k])
-            grids[f"{split}/{k}"] = wandb.Image(grid)
-        pl_module.logger.experiment.log(grids)
-    def log_local(self,
-                  outputs: List[List['Latent2MeshOutput']],
-                  images: Union[np.ndarray, List[np.ndarray]],
-                  description: List[str],
-                  keys: List[str],
-                  save_dir: str, split: str,
-                  global_step: int, current_epoch: int, batch_idx: int,
-                  prog_bar: bool = False,
-                  multi_views=None,  # yf ...
-                  ) -> None:
-        folder = "gs-{:010}_e-{:06}_b-{:06}".format(global_step, current_epoch, batch_idx)
-        visual_dir = os.path.join(save_dir, "visuals", split, folder)
-        os.makedirs(visual_dir, exist_ok=True)
-        num_samples = len(images)
-        for i in range(num_samples):
-            key_i = keys[i]
-            image_i = self.denormalize_image(images[i])
-            shape_tag_i = description[i]
-            for j in range(1):
-                mesh = outputs[j][i]
-                if mesh is None:
-                    continue
-                mesh_v = mesh.mesh_v.copy()
-                mesh_v[:, 0] += j * np.max(self.bbox_size)
-                self.viewer.add_mesh(mesh_v, mesh.mesh_f)
-            image_tag = html_util.to_image_embed_tag(image_i)
-            mesh_tag = self.viewer.to_html(html_frame=False)
-            table_tag = f"""
-            <table border = "1">
-                <caption> {shape_tag_i} - {key_i} </caption>
-                <caption> Input Image | Generated Mesh </caption>
-                <tr>
-                    <td>{image_tag}</td>
-                    <td>{mesh_tag}</td>
-                </tr>
-            </table>
-            """
-            if multi_views is not None:
-                multi_views_i = self.make_grid(multi_views[i])
-                views_tag = html_util.to_image_embed_tag(self.denormalize_image(multi_views_i))
-                table_tag = f"""
-                <table border = "1">
-                    <caption> {shape_tag_i} - {key_i} </caption>
-                    <caption> Input Image | Generated Mesh </caption>
-                    <tr>
-                        <td>{image_tag}</td>
-                        <td>{views_tag}</td>
-                        <td>{mesh_tag}</td>
-                    </tr>
-                </table>
-                """
-            html_frame = html_util.to_html_frame(table_tag)
-            if len(key_i) > 100:
-                key_i = key_i[:100]
-            with open(os.path.join(visual_dir, f"{key_i}.html"), "w") as writer:
-                writer.write(html_frame)
-            self.viewer.reset()
-    def log_sample(self,
-                   pl_module: pl.LightningModule,
-                   batch: Dict[str, torch.FloatTensor],
-                   batch_idx: int,
-                   split: str = "train") -> None:
-        """
-        Args:
-            pl_module:
-            batch (dict): the batch sample information, and it contains:
-                 - surface (torch.FloatTensor):
-                 - image (torch.FloatTensor):
-            batch_idx (int):
-            split (str):
-        Returns:
-        """
-        is_train = pl_module.training
-        if is_train:
-            pl_module.eval()
-        batch_size = len(batch["surface"])
-        replace = batch_size < self.num_samples
-        ids = np.random.choice(batch_size, self.num_samples, replace=replace)
-        with torch.no_grad():
-            # run text to mesh
-            # keys = [batch["__key__"][i] for i in ids]
-            keys = [f'key_{i}' for i in ids]
-            # texts = [batch["text"][i] for i in ids]
-            texts = [f'text_{i}'for i in ids]
-            # description = [batch["description"][i] for i in ids]
-            description = [f'desc_{i}_{os.path.splitext(os.path.basename(batch["uid"][i]))[0]}' for i in ids]
-            images = batch["image"][ids]
-            mask_input = batch["mask"][ids] if 'mask' in batch else None
-            # uids = batch["uid"][ids]
-            sample_batch = {
-                "__key__": keys,
-                "image": images,
-                'text': texts,
-                'mask': mask_input,
-            }
-            # if 'cam_parm' in batch:
-            #     sample_batch['cam_parm'] = batch['cam_parm'][ids]
-            # if 'multi_views' in batch:  # yf ...
-            #     sample_batch['multi_views'] = batch['multi_views'][ids]
-            outputs = pl_module.sample(
-                batch=sample_batch,
-                output_type='latents2mesh'
-            )
-            images = images.cpu().float().numpy()
-            # images = self.denormalize_image(images)
-            # images = np.transpose(images, (0, 2, 3, 1))
-            # images = ((images + 1) / 2 * 255).astype(np.uint8)
-        self.log_local(outputs, images, description, keys, pl_module.logger.save_dir, split,
-                       pl_module.global_step, pl_module.current_epoch, batch_idx, prog_bar=False,
-                       multi_views=sample_batch.get('multi_views'))
-        if is_train: pl_module.train()
-    def make_grid(self, images):  # return (3,h,w) in (0,1) ...
-        images_resized = []
-        for img in images:
-            img_resized = torchvision.transforms.functional.resize(img, (320, 320))
-            images_resized.append(img_resized)
-        image = torchvision.utils.make_grid(images_resized, nrow=2, padding=5, pad_value=255)
-        image = image.cpu().numpy()
-        #       image = np.transpose(image, (1, 2, 0))
-        #       image = (image * 255).astype(np.uint8)
-        return image
-    def check_frequency(self, step: int) -> bool:
-        if step % self.step_freq == 0:
-            return True
-        return False
-    def on_train_batch_end(self, trainer: pl.trainer.Trainer, pl_module: pl.LightningModule,
-                           outputs: Generic, batch: Dict[str, torch.FloatTensor], batch_idx: int) -> None:
-        if (self.check_frequency(pl_module.global_step) and  # batch_idx % self.batch_freq == 0
-            hasattr(pl_module, "sample") and
-            callable(pl_module.sample) and
-            self.num_samples > 0):
-            self.log_sample(pl_module, batch, batch_idx, split="train")
-            self.has_train_logged = True
-    def on_validation_batch_end(self, trainer: pl.trainer.Trainer, pl_module: pl.LightningModule,
-                                outputs: Generic, batch: Dict[str, torch.FloatTensor],
-                                dataloader_idx: int, batch_idx: int) -> None:
-        if self.has_train_logged:
-            self.log_sample(pl_module, batch, batch_idx, split="val")
-            self.has_train_logged = False
-    def denormalize_image(self, image):
-        """
-        Args:
-            image (np.ndarray): [3, h, w]
-        Returns:
-            image (np.ndarray): [h, w, 3], np.uint8, [0, 255].
-        """
-        # image = np.transpose(image, (0, 2, 3, 1))
-        image = np.transpose(image, (1, 2, 0))
-        if self.std is not None:
-            image = image * self.std
-        if self.mean is not None:
-            image = image + self.mean
-        image = (image * 255).astype(np.uint8)
-        return image
-class ImageConditionalFixASLDiffuserLogger(Callback):
-    def __init__(
-        self,
-        step_frequency: int,
-        test_data_path: str,
-        max_size: int = None,
-        save_dir: str = 'infer',
-        **kwargs,
-    ) -> None:
-        super().__init__()
-        self.step_freq = step_frequency
-        self.viewer = PyThreeJSViewer(settings={}, render_mode="WEBSITE")
-        self.test_data_path = test_data_path
-        with open(self.test_data_path, 'r') as f:
-            data = json.load(f)
-            self.file_list = data['file_list']
-            # self.file_folder = data['file_folder']
-            if max_size is not None:
-                self.file_list = self.file_list[:max_size]
-        self.kwargs = kwargs
-        self.save_dir = save_dir
-    def on_train_batch_end(
-        self,
-        trainer: pl.trainer.Trainer,
-        pl_module: pl.LightningModule,
-        outputs: Generic,
-        batch: Dict[str, torch.FloatTensor],
-        batch_idx: int,
-    ):
-        if pl_module.global_step % self.step_freq == 0:
-            with open(self.test_data_path, 'r') as f:
-                data = json.load(f)
-                self.file_list = data['file_list']
-            is_train = pl_module.training
-            if is_train:
-                pl_module.eval()
-            # folder_path = self.file_folder
-            # folder_name = os.path.basename(folder_path)
-            folder = "gs-{:010}_e-{:06}_b-{:06}".format(pl_module.global_step, pl_module.current_epoch, batch_idx)
-            visual_dir = os.path.join(pl_module.logger.save_dir, self.save_dir, folder)
-            os.makedirs(visual_dir, exist_ok=True)
-            image_paths = self.file_list
-            chunk_size = math.ceil(len(image_paths) / trainer.world_size)
-            if pl_module.global_rank == trainer.world_size - 1:
-                image_paths = image_paths[pl_module.global_rank * chunk_size:]
-            else:
-                image_paths = image_paths[pl_module.global_rank * chunk_size:(pl_module.global_rank + 1) * chunk_size]
-            print(f'Rank{pl_module.global_rank}: processing {len(image_paths)}|{len(self.file_list)} images')
-            for image_path in image_paths:
-                # if folder_path in image_path:
-                #     save_path = image_path.replace(folder_path, visual_dir)
-                # else:
-                save_path = os.path.join(visual_dir, os.path.basename(image_path))
-                save_path = os.path.splitext(save_path)[0] + '.glb'
-                if isinstance(image_path, str):
-                    print(image_path)
-                with torch.no_grad():
-                    mesh = pl_module.sample(batch={"image": image_path}, **self.kwargs)[0][0]
-                    if isinstance(mesh, tuple) and len(mesh)==2:
-                        mesh = export_to_trimesh(mesh)
-                    elif isinstance(mesh, trimesh.Trimesh):
-                        os.makedirs(os.path.dirname(save_path), exist_ok=True)
-                        mesh.export(save_path)
-            if is_train:
-                pl_module.train()

ultrashape/utils/trainings/peft.py DELETED Viewed

@@ -1,78 +0,0 @@
-# -*- coding: utf-8 -*-
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import os
-from pytorch_lightning.callbacks import Callback
-from omegaconf import OmegaConf, ListConfig
-class PeftSaveCallback(Callback):
-    def __init__(self, peft_model, save_dir: str, save_every_n_steps: int = None):
-        super().__init__()
-        self.peft_model = peft_model
-        self.save_dir = save_dir
-        self.save_every_n_steps = save_every_n_steps
-        os.makedirs(self.save_dir, exist_ok=True)
-    def recursive_convert(self, obj):
-        from omegaconf import OmegaConf, ListConfig
-        if isinstance(obj, (OmegaConf, ListConfig)):
-            return OmegaConf.to_container(obj, resolve=True)
-        elif isinstance(obj, dict):
-            return {k: self.recursive_convert(v) for k, v in obj.items()}
-        elif isinstance(obj, list):
-            return [self.recursive_convert(i) for i in obj]
-        elif isinstance(obj, type):
-            # 避免修改类对象
-            return obj
-        elif hasattr(obj, '__dict__'):
-            for attr_name, attr_value in vars(obj).items():
-                setattr(obj, attr_name, self.recursive_convert(attr_value))
-            return obj
-        else:
-            return obj
-    # def recursive_convert(self, obj):
-    #     if isinstance(obj, (OmegaConf, ListConfig)):
-    #         return OmegaConf.to_container(obj, resolve=True)
-    #     elif isinstance(obj, dict):
-    #         return {k: self.recursive_convert(v) for k, v in obj.items()}
-    #     elif isinstance(obj, list):
-    #         return [self.recursive_convert(i) for i in obj]
-    #     elif hasattr(obj, '__dict__'):
-    #         for attr_name, attr_value in vars(obj).items():
-    #             setattr(obj, attr_name, self.recursive_convert(attr_value))
-    #         return obj
-    #     else:
-    #         return obj
-    def _convert_peft_config(self):
-        pc = self.peft_model.peft_config
-        self.peft_model.peft_config = self.recursive_convert(pc)
-    def on_train_epoch_end(self, trainer, pl_module):
-        self._convert_peft_config()
-        save_path = os.path.join(self.save_dir, f"epoch_{trainer.current_epoch}")
-        self.peft_model.save_pretrained(save_path)
-        print(f"[PeftSaveCallback] Saved LoRA weights to {save_path}")
-    def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):
-        if self.save_every_n_steps is not None:
-            global_step = trainer.global_step
-            if global_step % self.save_every_n_steps == 0 and global_step > 0:
-                self._convert_peft_config()
-                save_path = os.path.join(self.save_dir, f"step_{global_step}")
-                self.peft_model.save_pretrained(save_path)
-                print(f"[PeftSaveCallback] Saved LoRA weights to {save_path}")

ultrashape/utils/typing.py DELETED Viewed

@@ -1,41 +0,0 @@
-"""
-This module contains type annotations for the project, using
-1. Python type hints (https://docs.python.org/3/library/typing.html) for Python objects
-2. jaxtyping (https://github.com/google/jaxtyping/blob/main/API.md) for PyTorch tensors
-Two types of typing checking can be used:
-1. Static type checking with mypy (install with pip and enabled as the default linter in VSCode)
-2. Runtime type checking with typeguard (install with pip and triggered at runtime, mainly for tensor dtype and shape checking)
-"""
-# Basic types
-from typing import (
-    Any,
-    Callable,
-    Dict,
-    Iterable,
-    List,
-    Literal,
-    NamedTuple,
-    NewType,
-    Optional,
-    Sized,
-    Tuple,
-    Type,
-    TypeVar,
-    Union,
-    Sequence,
-)
-# Tensor dtype
-# for jaxtyping usage, see https://github.com/google/jaxtyping/blob/main/API.md
-from jaxtyping import Bool, Complex, Float, Inexact, Int, Integer, Num, Shaped, UInt
-# Config type
-from omegaconf import DictConfig
-# PyTorch Tensor type
-from torch import Tensor
-# Runtime type checking decorator
-from typeguard import typechecked as typechecker

ultrashape/utils/utils.py DELETED Viewed

@@ -1,128 +0,0 @@
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import logging
-import os
-from functools import wraps
-import torch
-def get_logger(name):
-    logger = logging.getLogger(name)
-    logger.setLevel(logging.INFO)
-    console_handler = logging.StreamHandler()
-    console_handler.setLevel(logging.INFO)
-    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-    console_handler.setFormatter(formatter)
-    logger.addHandler(console_handler)
-    return logger
-logger = get_logger('hy3dgen.shapgen')
-class synchronize_timer:
-    """ Synchronized timer to count the inference time of `nn.Module.forward`.
-        Supports both context manager and decorator usage.
-        Example as context manager:
-        ```python
-        with synchronize_timer('name') as t:
-            run()
-        ```
-        Example as decorator:
-        ```python
-        @synchronize_timer('Export to trimesh')
-        def export_to_trimesh(mesh_output):
-            pass
-        ```
-    """
-    def __init__(self, name=None):
-        self.name = name
-    def __enter__(self):
-        """Context manager entry: start timing."""
-        if os.environ.get('HY3DGEN_DEBUG', '0') == '1':
-            self.start = torch.cuda.Event(enable_timing=True)
-            self.end = torch.cuda.Event(enable_timing=True)
-            self.start.record()
-            return lambda: self.time
-    def __exit__(self, exc_type, exc_value, exc_tb):
-        """Context manager exit: stop timing and log results."""
-        if os.environ.get('HY3DGEN_DEBUG', '0') == '1':
-            self.end.record()
-            torch.cuda.synchronize()
-            self.time = self.start.elapsed_time(self.end)
-            if self.name is not None:
-                logger.info(f'{self.name} takes {self.time} ms')
-    def __call__(self, func):
-        """Decorator: wrap the function to time its execution."""
-        @wraps(func)
-        def wrapper(*args, **kwargs):
-            with self:
-                result = func(*args, **kwargs)
-            return result
-        return wrapper
-def smart_load_model(
-    model_path,
-    subfolder,
-    use_safetensors,
-    variant,
-):
-    original_model_path = model_path
-    # try local path
-    base_dir = os.environ.get('HY3DGEN_MODELS', '~/.cache/hy3dgen')
-    model_fld = os.path.expanduser(os.path.join(base_dir, model_path))
-    model_path = os.path.expanduser(os.path.join(base_dir, model_path, subfolder))
-    logger.info(f'Try to load model from local path: {model_path}')
-    if not os.path.exists(model_path):
-        logger.info('Model path not exists, try to download from huggingface')
-        try:
-            from huggingface_hub import snapshot_download
-            # 只下载指定子目录
-            path = snapshot_download(
-                repo_id=original_model_path,
-                allow_patterns=[f"{subfolder}/*"],  # 关键修改：模式匹配子文件夹
-                local_dir=model_fld
-            )
-            model_path = os.path.join(path, subfolder)  # 保持路径拼接逻辑不变
-        except ImportError:
-            logger.warning(
-                "You need to install HuggingFace Hub to load models from the hub."
-            )
-            raise RuntimeError(f"Model path {model_path} not found")
-        except Exception as e:
-            raise e
-    if not os.path.exists(model_path):
-        raise FileNotFoundError(f"Model path {original_model_path} not found")
-    extension = 'ckpt' if not use_safetensors else 'safetensors'
-    variant = '' if variant is None else f'.{variant}'
-    ckpt_name = f'model{variant}.{extension}'
-    config_path = os.path.join(model_path, 'config.yaml')
-    ckpt_path = os.path.join(model_path, ckpt_name)
-    return config_path, ckpt_path

ultrashape/utils/visualizers/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- # -- coding: utf-8 --

ultrashape/utils/visualizers/color_util.py DELETED Viewed

@@ -1,57 +0,0 @@
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import numpy as np
-import matplotlib.pyplot as plt
-# Helper functions
-def get_colors(inp, colormap="viridis", normalize=True, vmin=None, vmax=None):
-    colormap = plt.cm.get_cmap(colormap)
-    if normalize:
-        vmin = np.min(inp)
-        vmax = np.max(inp)
-    norm = plt.Normalize(vmin, vmax)
-    return colormap(norm(inp))[:, :3]
-def gen_checkers(n_checkers_x, n_checkers_y, width=256, height=256):
-    # tex dims need to be power of two.
-    array = np.ones((width, height, 3), dtype='float32')
-    # width in texels of each checker
-    checker_w = width / n_checkers_x
-    checker_h = height / n_checkers_y
-    for y in range(height):
-        for x in range(width):
-            color_key = int(x / checker_w) + int(y / checker_h)
-            if color_key % 2 == 0:
-                array[x, y, :] = [1., 0.874, 0.0]
-            else:
-                array[x, y, :] = [0., 0., 0.]
-    return array
-def gen_circle(width=256, height=256):
-    xx, yy = np.mgrid[:width, :height]
-    circle = (xx - width / 2 + 0.5) ** 2 + (yy - height / 2 + 0.5) ** 2
-    array = np.ones((width, height, 4), dtype='float32')
-    array[:, :, 0] = (circle <= width)
-    array[:, :, 1] = (circle <= width)
-    array[:, :, 2] = (circle <= width)
-    array[:, :, 3] = circle <= width
-    return array

ultrashape/utils/visualizers/html_util.py DELETED Viewed

@@ -1,64 +0,0 @@
-# -*- coding: utf-8 -*-
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import io
-import base64
-import numpy as np
-from PIL import Image
-def to_html_frame(content):
-    html_frame = f"""
-    <html>
-      <body>
-        {content}
-      </body>
-    </html>
-    """
-    return html_frame
-def to_single_row_table(caption: str, content: str):
-    table_html = f"""
-    <table border = "1">
-        <caption>{caption}</caption>
-        <tr>
-            <td>{content}</td>
-        </tr>
-    </table>
-    """
-    return table_html
-def to_image_embed_tag(image: np.ndarray):
-    # Convert np.ndarray to bytes
-    img = Image.fromarray(image)
-    raw_bytes = io.BytesIO()
-    img.save(raw_bytes, "PNG")
-    # Encode bytes to base64
-    image_base64 = base64.b64encode(raw_bytes.getvalue()).decode("utf-8")
-    image_tag = f"""
-    <img src="data:image/png;base64,{image_base64}" alt="Embedded Image">
-    """
-    return image_tag

ultrashape/utils/visualizers/pythreejs_viewer.py DELETED Viewed

@@ -1,549 +0,0 @@
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import numpy as np
-from ipywidgets import embed
-import pythreejs as p3s
-import uuid
-from .color_util import get_colors, gen_circle, gen_checkers
-EMBED_URL = "https://cdn.jsdelivr.net/npm/@jupyter-widgets/html-manager@1.0.1/dist/embed-amd.js"
-class PyThreeJSViewer(object):
-    def __init__(self, settings, render_mode="WEBSITE"):
-        self.render_mode = render_mode
-        self.__update_settings(settings)
-        self._light = p3s.DirectionalLight(color='white', position=[0, 0, 1], intensity=0.6)
-        self._light2 = p3s.AmbientLight(intensity=0.5)
-        self._cam = p3s.PerspectiveCamera(position=[0, 0, 1], lookAt=[0, 0, 0], fov=self.__s["fov"],
-                                          aspect=self.__s["width"] / self.__s["height"], children=[self._light])
-        self._orbit = p3s.OrbitControls(controlling=self._cam)
-        self._scene = p3s.Scene(children=[self._cam, self._light2], background=self.__s["background"])  # "#4c4c80"
-        self._renderer = p3s.Renderer(camera=self._cam, scene=self._scene, controls=[self._orbit],
-                                      width=self.__s["width"], height=self.__s["height"],
-                                      antialias=self.__s["antialias"])
-        self.__objects = {}
-        self.__cnt = 0
-    def jupyter_mode(self):
-        self.render_mode = "JUPYTER"
-    def offline(self):
-        self.render_mode = "OFFLINE"
-    def website(self):
-        self.render_mode = "WEBSITE"
-    def __get_shading(self, shading):
-        shad = {"flat": True, "wireframe": False, "wire_width": 0.03, "wire_color": "black",
-                "side": 'DoubleSide', "colormap": "viridis", "normalize": [None, None],
-                "bbox": False, "roughness": 0.5, "metalness": 0.25, "reflectivity": 1.0,
-                "line_width": 1.0, "line_color": "black",
-                "point_color": "red", "point_size": 0.01, "point_shape": "circle",
-                "text_color": "red"
-                }
-        for k in shading:
-            shad[k] = shading[k]
-        return shad
-    def __update_settings(self, settings={}):
-        sett = {"width": 1600, "height": 800, "antialias": True, "scale": 1.5, "background": "#ffffff",
-                "fov": 30}
-        for k in settings:
-            sett[k] = settings[k]
-        self.__s = sett
-    def __add_object(self, obj, parent=None):
-        if not parent:  # Object is added to global scene and objects dict
-            self.__objects[self.__cnt] = obj
-            self.__cnt += 1
-            self._scene.add(obj["mesh"])
-        else:  # Object is added to parent object and NOT to objects dict
-            parent.add(obj["mesh"])
-        self.__update_view()
-        if self.render_mode == "JUPYTER":
-            return self.__cnt - 1
-        elif self.render_mode == "WEBSITE":
-            return self
-    def __add_line_geometry(self, lines, shading, obj=None):
-        lines = lines.astype("float32", copy=False)
-        mi = np.min(lines, axis=0)
-        ma = np.max(lines, axis=0)
-        geometry = p3s.LineSegmentsGeometry(positions=lines.reshape((-1, 2, 3)))
-        material = p3s.LineMaterial(linewidth=shading["line_width"], color=shading["line_color"])
-        # , vertexColors='VertexColors'),
-        lines = p3s.LineSegments2(geometry=geometry, material=material)  # type='LinePieces')
-        line_obj = {"geometry": geometry, "mesh": lines, "material": material,
-                    "max": ma, "min": mi, "type": "Lines", "wireframe": None}
-        if obj:
-            return self.__add_object(line_obj, obj), line_obj
-        else:
-            return self.__add_object(line_obj)
-    def __update_view(self):
-        if len(self.__objects) == 0:
-            return
-        ma = np.zeros((len(self.__objects), 3))
-        mi = np.zeros((len(self.__objects), 3))
-        for r, obj in enumerate(self.__objects):
-            ma[r] = self.__objects[obj]["max"]
-            mi[r] = self.__objects[obj]["min"]
-        ma = np.max(ma, axis=0)
-        mi = np.min(mi, axis=0)
-        diag = np.linalg.norm(ma - mi)
-        mean = ((ma - mi) / 2 + mi).tolist()
-        scale = self.__s["scale"] * (diag)
-        self._orbit.target = mean
-        self._cam.lookAt(mean)
-        self._cam.position = [mean[0], mean[1], mean[2] + scale]
-        self._light.position = [mean[0], mean[1], mean[2] + scale]
-        self._orbit.exec_three_obj_method('update')
-        self._cam.exec_three_obj_method('updateProjectionMatrix')
-    def __get_bbox(self, v):
-        m = np.min(v, axis=0)
-        M = np.max(v, axis=0)
-        # Corners of the bounding box
-        v_box = np.array([[m[0], m[1], m[2]], [M[0], m[1], m[2]], [M[0], M[1], m[2]], [m[0], M[1], m[2]],
-                          [m[0], m[1], M[2]], [M[0], m[1], M[2]], [M[0], M[1], M[2]], [m[0], M[1], M[2]]])
-        f_box = np.array([[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4],
-                          [0, 4], [1, 5], [2, 6], [7, 3]], dtype=np.uint32)
-        return v_box, f_box
-    def __get_colors(self, v, f, c, sh):
-        coloring = "VertexColors"
-        if type(c) == np.ndarray and c.size == 3:  # Single color
-            colors = np.ones_like(v)
-            colors[:, 0] = c[0]
-            colors[:, 1] = c[1]
-            colors[:, 2] = c[2]
-            # print("Single colors")
-        elif type(c) == np.ndarray and len(c.shape) == 2 and c.shape[1] == 3:  # Color values for
-            if c.shape[0] == f.shape[0]:  # faces
-                colors = np.hstack([c, c, c]).reshape((-1, 3))
-                coloring = "FaceColors"
-                # print("Face color values")
-            elif c.shape[0] == v.shape[0]:  # vertices
-                colors = c
-                # print("Vertex color values")
-            else:  # Wrong size, fallback
-                print("Invalid color array given! Supported are numpy arrays.", type(c))
-                colors = np.ones_like(v)
-                colors[:, 0] = 1.0
-                colors[:, 1] = 0.874
-                colors[:, 2] = 0.0
-        elif type(c) == np.ndarray and c.size == f.shape[0]:  # Function values for faces
-            normalize = sh["normalize"][0] != None and sh["normalize"][1] != None
-            cc = get_colors(c, sh["colormap"], normalize=normalize,
-                            vmin=sh["normalize"][0], vmax=sh["normalize"][1])
-            # print(cc.shape)
-            colors = np.hstack([cc, cc, cc]).reshape((-1, 3))
-            coloring = "FaceColors"
-            # print("Face function values")
-        elif type(c) == np.ndarray and c.size == v.shape[0]:  # Function values for vertices
-            normalize = sh["normalize"][0] != None and sh["normalize"][1] != None
-            colors = get_colors(c, sh["colormap"], normalize=normalize,
-                                vmin=sh["normalize"][0], vmax=sh["normalize"][1])
-            # print("Vertex function values")
-        else:
-            colors = np.ones_like(v)
-            colors[:, 0] = 1.0
-            colors[:, 1] = 0.874
-            colors[:, 2] = 0.0
-            # No color
-            if c is not None:
-                print("Invalid color array given! Supported are numpy arrays.", type(c))
-        return colors, coloring
-    def __get_point_colors(self, v, c, sh):
-        v_color = True
-        if c is None:  # No color given, use global color
-            # conv = mpl.colors.ColorConverter()
-            colors = sh["point_color"]  # np.array(conv.to_rgb(sh["point_color"]))
-            v_color = False
-        elif isinstance(c, str):  # No color given, use global color
-            # conv = mpl.colors.ColorConverter()
-            colors = c  # np.array(conv.to_rgb(c))
-            v_color = False
-        elif type(c) == np.ndarray and len(c.shape) == 2 and c.shape[0] == v.shape[0] and c.shape[1] == 3:
-            # Point color
-            colors = c.astype("float32", copy=False)
-        elif isinstance(c, np.ndarray) and len(c.shape) == 2 and c.shape[0] == v.shape[0] and c.shape[1] != 3:
-            # Function values for vertices, but the colors are features
-            c_norm = np.linalg.norm(c, ord=2, axis=-1)
-            normalize = sh["normalize"][0] != None and sh["normalize"][1] != None
-            colors = get_colors(c_norm, sh["colormap"], normalize=normalize,
-                                vmin=sh["normalize"][0], vmax=sh["normalize"][1])
-            colors = colors.astype("float32", copy=False)
-        elif type(c) == np.ndarray and c.size == v.shape[0]:  # Function color
-            normalize = sh["normalize"][0] != None and sh["normalize"][1] != None
-            colors = get_colors(c, sh["colormap"], normalize=normalize,
-                                vmin=sh["normalize"][0], vmax=sh["normalize"][1])
-            colors = colors.astype("float32", copy=False)
-            # print("Vertex function values")
-        else:
-            print("Invalid color array given! Supported are numpy arrays.", type(c))
-            colors = sh["point_color"]
-            v_color = False
-        return colors, v_color
-    def add_mesh(self, v, f, c=None, uv=None, n=None, shading={}, texture_data=None, **kwargs):
-        shading.update(kwargs)
-        sh = self.__get_shading(shading)
-        mesh_obj = {}
-        # it is a tet
-        if v.shape[1] == 3 and f.shape[1] == 4:
-            f_tmp = np.ndarray([f.shape[0] * 4, 3], dtype=f.dtype)
-            for i in range(f.shape[0]):
-                f_tmp[i * 4 + 0] = np.array([f[i][1], f[i][0], f[i][2]])
-                f_tmp[i * 4 + 1] = np.array([f[i][0], f[i][1], f[i][3]])
-                f_tmp[i * 4 + 2] = np.array([f[i][1], f[i][2], f[i][3]])
-                f_tmp[i * 4 + 3] = np.array([f[i][2], f[i][0], f[i][3]])
-            f = f_tmp
-        if v.shape[1] == 2:
-            v = np.append(v, np.zeros([v.shape[0], 1]), 1)
-        # Type adjustment vertices
-        v = v.astype("float32", copy=False)
-        # Color setup
-        colors, coloring = self.__get_colors(v, f, c, sh)
-        # Type adjustment faces and colors
-        c = colors.astype("float32", copy=False)
-        # Material and geometry setup
-        ba_dict = {"color": p3s.BufferAttribute(c)}
-        if coloring == "FaceColors":
-            verts = np.zeros((f.shape[0] * 3, 3), dtype="float32")
-            for ii in range(f.shape[0]):
-                # print(ii*3, f[ii])
-                verts[ii * 3] = v[f[ii, 0]]
-                verts[ii * 3 + 1] = v[f[ii, 1]]
-                verts[ii * 3 + 2] = v[f[ii, 2]]
-            v = verts
-        else:
-            f = f.astype("uint32", copy=False).ravel()
-            ba_dict["index"] = p3s.BufferAttribute(f, normalized=False)
-        ba_dict["position"] = p3s.BufferAttribute(v, normalized=False)
-        if uv is not None:
-            uv = (uv - np.min(uv)) / (np.max(uv) - np.min(uv))
-            if texture_data is None:
-                texture_data = gen_checkers(20, 20)
-            tex = p3s.DataTexture(data=texture_data, format="RGBFormat", type="FloatType")
-            material = p3s.MeshStandardMaterial(map=tex, reflectivity=sh["reflectivity"], side=sh["side"],
-                                                roughness=sh["roughness"], metalness=sh["metalness"],
-                                                flatShading=sh["flat"],
-                                                polygonOffset=True, polygonOffsetFactor=1, polygonOffsetUnits=5)
-            ba_dict["uv"] = p3s.BufferAttribute(uv.astype("float32", copy=False))
-        else:
-            material = p3s.MeshStandardMaterial(vertexColors=coloring, reflectivity=sh["reflectivity"],
-                                                side=sh["side"], roughness=sh["roughness"], metalness=sh["metalness"],
-                                                flatShading=sh["flat"],
-                                                polygonOffset=True, polygonOffsetFactor=1, polygonOffsetUnits=5)
-        if type(n) != type(None) and coloring == "VertexColors":  # TODO: properly handle normals for FaceColors as well
-            ba_dict["normal"] = p3s.BufferAttribute(n.astype("float32", copy=False), normalized=True)
-        geometry = p3s.BufferGeometry(attributes=ba_dict)
-        if coloring == "VertexColors" and type(n) == type(None):
-            geometry.exec_three_obj_method('computeVertexNormals')
-        elif coloring == "FaceColors" and type(n) == type(None):
-            geometry.exec_three_obj_method('computeFaceNormals')
-        # Mesh setup
-        mesh = p3s.Mesh(geometry=geometry, material=material)
-        # Wireframe setup
-        mesh_obj["wireframe"] = None
-        if sh["wireframe"]:
-            wf_geometry = p3s.WireframeGeometry(mesh.geometry)  # WireframeGeometry
-            wf_material = p3s.LineBasicMaterial(color=sh["wire_color"], linewidth=sh["wire_width"])
-            wireframe = p3s.LineSegments(wf_geometry, wf_material)
-            mesh.add(wireframe)
-            mesh_obj["wireframe"] = wireframe
-        # Bounding box setup
-        if sh["bbox"]:
-            v_box, f_box = self.__get_bbox(v)
-            _, bbox = self.add_edges(v_box, f_box, sh, mesh)
-            mesh_obj["bbox"] = [bbox, v_box, f_box]
-        # Object setup
-        mesh_obj["max"] = np.max(v, axis=0)
-        mesh_obj["min"] = np.min(v, axis=0)
-        mesh_obj["geometry"] = geometry
-        mesh_obj["mesh"] = mesh
-        mesh_obj["material"] = material
-        mesh_obj["type"] = "Mesh"
-        mesh_obj["shading"] = sh
-        mesh_obj["coloring"] = coloring
-        mesh_obj["arrays"] = [v, f, c]  # TODO replays with proper storage or remove if not needed
-        return self.__add_object(mesh_obj)
-    def add_lines(self, beginning, ending, shading={}, obj=None, **kwargs):
-        shading.update(kwargs)
-        if len(beginning.shape) == 1:
-            if len(beginning) == 2:
-                beginning = np.array([[beginning[0], beginning[1], 0]])
-        else:
-            if beginning.shape[1] == 2:
-                beginning = np.append(
-                    beginning, np.zeros([beginning.shape[0], 1]), 1)
-        if len(ending.shape) == 1:
-            if len(ending) == 2:
-                ending = np.array([[ending[0], ending[1], 0]])
-        else:
-            if ending.shape[1] == 2:
-                ending = np.append(
-                    ending, np.zeros([ending.shape[0], 1]), 1)
-        sh = self.__get_shading(shading)
-        lines = np.hstack([beginning, ending])
-        lines = lines.reshape((-1, 3))
-        return self.__add_line_geometry(lines, sh, obj)
-    def add_edges(self, vertices, edges, shading={}, obj=None, **kwargs):
-        shading.update(kwargs)
-        if vertices.shape[1] == 2:
-            vertices = np.append(
-                vertices, np.zeros([vertices.shape[0], 1]), 1)
-        sh = self.__get_shading(shading)
-        lines = np.zeros((edges.size, 3))
-        cnt = 0
-        for e in edges:
-            lines[cnt, :] = vertices[e[0]]
-            lines[cnt + 1, :] = vertices[e[1]]
-            cnt += 2
-        return self.__add_line_geometry(lines, sh, obj)
-    def add_points(self, points, c=None, shading={}, obj=None, **kwargs):
-        shading.update(kwargs)
-        if len(points.shape) == 1:
-            if len(points) == 2:
-                points = np.array([[points[0], points[1], 0]])
-        else:
-            if points.shape[1] == 2:
-                points = np.append(
-                    points, np.zeros([points.shape[0], 1]), 1)
-        sh = self.__get_shading(shading)
-        points = points.astype("float32", copy=False)
-        mi = np.min(points, axis=0)
-        ma = np.max(points, axis=0)
-        g_attributes = {"position": p3s.BufferAttribute(points, normalized=False)}
-        m_attributes = {"size": sh["point_size"]}
-        if sh["point_shape"] == "circle":  # Plot circles
-            tex = p3s.DataTexture(data=gen_circle(16, 16), format="RGBAFormat", type="FloatType")
-            m_attributes["map"] = tex
-            m_attributes["alphaTest"] = 0.5
-            m_attributes["transparency"] = True
-        else:  # Plot squares
-            pass
-        colors, v_colors = self.__get_point_colors(points, c, sh)
-        if v_colors:  # Colors per point
-            m_attributes["vertexColors"] = 'VertexColors'
-            g_attributes["color"] = p3s.BufferAttribute(colors, normalized=False)
-        else:  # Colors for all points
-            m_attributes["color"] = colors
-        material = p3s.PointsMaterial(**m_attributes)
-        geometry = p3s.BufferGeometry(attributes=g_attributes)
-        points = p3s.Points(geometry=geometry, material=material)
-        point_obj = {"geometry": geometry, "mesh": points, "material": material,
-                     "max": ma, "min": mi, "type": "Points", "wireframe": None}
-        if obj:
-            return self.__add_object(point_obj, obj), point_obj
-        else:
-            return self.__add_object(point_obj)
-    def remove_object(self, obj_id):
-        if obj_id not in self.__objects:
-            print("Invalid object id. Valid ids are: ", list(self.__objects.keys()))
-            return
-        self._scene.remove(self.__objects[obj_id]["mesh"])
-        del self.__objects[obj_id]
-        self.__update_view()
-    def reset(self):
-        for obj_id in list(self.__objects.keys()).copy():
-            self._scene.remove(self.__objects[obj_id]["mesh"])
-            del self.__objects[obj_id]
-        self.__update_view()
-    def update_object(self, oid=0, vertices=None, colors=None, faces=None):
-        obj = self.__objects[oid]
-        if type(vertices) != type(None):
-            if obj["coloring"] == "FaceColors":
-                f = obj["arrays"][1]
-                verts = np.zeros((f.shape[0] * 3, 3), dtype="float32")
-                for ii in range(f.shape[0]):
-                    # print(ii*3, f[ii])
-                    verts[ii * 3] = vertices[f[ii, 0]]
-                    verts[ii * 3 + 1] = vertices[f[ii, 1]]
-                    verts[ii * 3 + 2] = vertices[f[ii, 2]]
-                v = verts
-            else:
-                v = vertices.astype("float32", copy=False)
-            obj["geometry"].attributes["position"].array = v
-            # self.wireframe.attributes["position"].array = v # Wireframe updates?
-            obj["geometry"].attributes["position"].needsUpdate = True
-        #           obj["geometry"].exec_three_obj_method('computeVertexNormals')
-        if type(colors) != type(None):
-            colors, coloring = self.__get_colors(obj["arrays"][0], obj["arrays"][1], colors, obj["shading"])
-            colors = colors.astype("float32", copy=False)
-            obj["geometry"].attributes["color"].array = colors
-            obj["geometry"].attributes["color"].needsUpdate = True
-        if type(faces) != type(None):
-            if obj["coloring"] == "FaceColors":
-                print("Face updates are currently only possible in vertex color mode.")
-                return
-            f = faces.astype("uint32", copy=False).ravel()
-            print(obj["geometry"].attributes)
-            obj["geometry"].attributes["index"].array = f
-            # self.wireframe.attributes["position"].array = v # Wireframe updates?
-            obj["geometry"].attributes["index"].needsUpdate = True
-        #            obj["geometry"].exec_three_obj_method('computeVertexNormals')
-        # self.mesh.geometry.verticesNeedUpdate = True
-        # self.mesh.geometry.elementsNeedUpdate = True
-        # self.update()
-        if self.render_mode == "WEBSITE":
-            return self
-    #    def update(self):
-    #        self.mesh.exec_three_obj_method('update')
-    #        self.orbit.exec_three_obj_method('update')
-    #        self.cam.exec_three_obj_method('updateProjectionMatrix')
-    #        self.scene.exec_three_obj_method('update')
-    def add_text(self, text, shading={}, **kwargs):
-        shading.update(kwargs)
-        sh = self.__get_shading(shading)
-        tt = p3s.TextTexture(string=text, color=sh["text_color"])
-        sm = p3s.SpriteMaterial(map=tt)
-        text = p3s.Sprite(material=sm, scaleToTexture=True)
-        self._scene.add(text)
-    # def add_widget(self, widget, callback):
-    #    self.widgets.append(widget)
-    #    widget.observe(callback, names='value')
-    #    def add_dropdown(self, options, default, desc, cb):
-    #        widget = widgets.Dropdown(options=options, value=default, description=desc)
-    #        self.__widgets.append(widget)
-    #        widget.observe(cb, names="value")
-    #        display(widget)
-    #    def add_button(self, text, cb):
-    #        button = widgets.Button(description=text)
-    #        self.__widgets.append(button)
-    #        button.on_click(cb)
-    #        display(button)
-    def to_html(self, imports=True, html_frame=True):
-        # Bake positions (fixes centering bug in offline rendering)
-        if len(self.__objects) == 0:
-            return
-        ma = np.zeros((len(self.__objects), 3))
-        mi = np.zeros((len(self.__objects), 3))
-        for r, obj in enumerate(self.__objects):
-            ma[r] = self.__objects[obj]["max"]
-            mi[r] = self.__objects[obj]["min"]
-        ma = np.max(ma, axis=0)
-        mi = np.min(mi, axis=0)
-        diag = np.linalg.norm(ma - mi)
-        mean = (ma - mi) / 2 + mi
-        for r, obj in enumerate(self.__objects):
-            v = self.__objects[obj]["geometry"].attributes["position"].array
-            v -= mean
-            # v += np.array([0.0, .9, 0.0]) #! to move the obj to the center of window
-        scale = self.__s["scale"] * (diag)
-        self._orbit.target = [0.0, 0.0, 0.0]
-        self._cam.lookAt([0.0, 0.0, 0.0])
-        # self._cam.position = [0.0, 0.0, scale]
-        self._cam.position = [0.0, 0.5, scale * 1.3] #! show four complete meshes in the window
-        self._light.position = [0.0, 0.0, scale]
-        state = embed.dependency_state(self._renderer)
-        # Somehow these entries are missing when the state is exported in python.
-        # Exporting from the GUI works, so we are inserting the missing entries.
-        for k in state:
-            if state[k]["model_name"] == "OrbitControlsModel":
-                state[k]["state"]["maxAzimuthAngle"] = "inf"
-                state[k]["state"]["maxDistance"] = "inf"
-                state[k]["state"]["maxZoom"] = "inf"
-                state[k]["state"]["minAzimuthAngle"] = "-inf"
-        tpl = embed.load_requirejs_template
-        if not imports:
-            embed.load_requirejs_template = ""
-        s = embed.embed_snippet(self._renderer, state=state, embed_url=EMBED_URL)
-        # s = embed.embed_snippet(self.__w, state=state)
-        embed.load_requirejs_template = tpl
-        if html_frame:
-            s = "<html>\n<body>\n" + s + "\n</body>\n</html>"
-        # Revert changes
-        for r, obj in enumerate(self.__objects):
-            v = self.__objects[obj]["geometry"].attributes["position"].array
-            v += mean
-        self.__update_view()
-        return s
-    def save(self, filename=""):
-        if filename == "":
-            uid = str(uuid.uuid4()) + ".html"
-        else:
-            filename = filename.replace(".html", "")
-            uid = filename + '.html'
-        with open(uid, "w") as f:
-            f.write(self.to_html())
-        print("Plot saved to file %s." % uid)

ultrashape/utils/voxelize.py DELETED Viewed

@@ -1,74 +0,0 @@
-import torch
-def voxelize_from_point(pc, num_latents, resolution=128):
-    B, N, D = pc.shape
-    device = pc.device
-    norm_pc = (pc + 1.0) / 2.0
-    voxel_indices = torch.floor(norm_pc * resolution).long()
-    voxel_indices = torch.clamp(voxel_indices, 0, resolution - 1) # (B, N, 3)
-    batch_idx = torch.arange(B, device=device).view(B, 1).expand(B, N)
-    flat_indices = torch.cat([batch_idx.unsqueeze(-1), voxel_indices], dim=-1).view(-1, 4)
-    unique_voxels = torch.unique(flat_indices, dim=0)
-    u_batch_ids = unique_voxels[:, 0]
-    noise = torch.rand_like(u_batch_ids, dtype=torch.float)
-    sort_keys = u_batch_ids.float() + noise
-    perm = torch.argsort(sort_keys)
-    shuffled_voxels = unique_voxels[perm]
-    shuffled_batch_ids = shuffled_voxels[:, 0].contiguous()
-    counts = torch.bincount(shuffled_batch_ids, minlength=B)
-    min_count = counts.min().item()
-    # Always aim for num_latents
-    actual_k = num_latents
-    if min_count < num_latents:
-        print(f"[Info] Voxel count ({min_count}) < Target ({num_latents}). Sampling with replacement.")
-        # If we don't have enough unique voxels, we need to sample with replacement/repetition
-        # We can just repeat the indices to fill the gap
-        batch_starts = torch.searchsorted(shuffled_batch_ids, torch.arange(B, device=device))
-        # Create gathering indices that wrap around for each batch
-        # For each batch element i, we want actual_k indices
-        # They start at batch_starts[i] and go up to batch_starts[i] + counts[i]
-        # We use modulo to wrap around: (j % counts[i]) + batch_starts[i]
-        # Expand for broadcasting
-        batch_starts_exp = batch_starts.unsqueeze(1) # [B, 1]
-        counts_exp = counts.unsqueeze(1) # [B, 1]
-        offsets = torch.arange(actual_k, device=device).unsqueeze(0) # [1, K]
-        # Calculate offsets modulo the available count for each batch
-        # This effectively repeats the available voxels to fill the desired size
-        # We need to be careful about division by zero if a batch has 0 voxels (shouldn't happen with valid PC)
-        counts_exp = torch.maximum(counts_exp, torch.tensor(1, device=device))
-        wrapped_offsets = offsets % counts_exp
-        gather_indices = batch_starts_exp + wrapped_offsets
-        gather_indices = gather_indices.view(-1)
-    else:
-        # Standard case: enough points, just take the first k
-        batch_starts = torch.searchsorted(shuffled_batch_ids, torch.arange(B, device=device))
-        offsets = torch.arange(actual_k, device=device).unsqueeze(0)
-        gather_indices = batch_starts.unsqueeze(1) + offsets
-        gather_indices = gather_indices.view(-1)
-    selected_indices = shuffled_voxels[gather_indices]
-    final_grid_coords = selected_indices[:, 1:]
-    # Grid Index -> Voxel Center
-    voxel_size = 2.0 / resolution
-    final_centers = (final_grid_coords.float() + 0.5) * voxel_size - 1.0
-    sampled_pc = final_centers.view(B, actual_k, 3)
-    sampled_indices = final_grid_coords.view(B, actual_k, 3)
-    return sampled_pc, sampled_indices