Spaces:

VAST-AI
/

TripoSplat

Running on Zero

App Files Files Community

Delete triposplat.py

by rapidrattle - opened 25 days ago

base: refs/heads/main

←

from: refs/pr/4

Discussion Files changed

-598

Files changed (1) hide show

triposplat.py +0 -598

triposplat.py DELETED Viewed

@@ -1,598 +0,0 @@
-import numpy as np
-import torch
-import torch.nn.functional as F
-import safetensors.torch
-from PIL import Image, ImageFilter
-from torchvision import transforms
-from tqdm.auto import tqdm
-from model import (
-    DinoV3ViT, Flux2VAEEncoder, BiRefNet,
-    OctreeProbabilityFixedlenDecoder, ElasticGaussianFixedlenDecoder,
-    LatentSeqMMFlowModel, OctreeGaussianDecoder,
-)
-# ---------------------------------------------------------------------------
-# Gaussian
-# ---------------------------------------------------------------------------
-class Gaussian:
-    def __init__(self, aabb: list, sh_degree: int = 0, mininum_kernel_size: float = 0.0,
-                 scaling_bias: float = 0.01, opacity_bias: float = 0.1,
-                 scaling_activation: str = "exp", device='cuda'):
-        self.sh_degree = sh_degree
-        self.mininum_kernel_size = mininum_kernel_size
-        self.scaling_bias = scaling_bias
-        self.opacity_bias = opacity_bias
-        self.device = device
-        self.aabb = torch.tensor(aabb, dtype=torch.float32, device=device)
-        if scaling_activation == "exp":
-            self._scaling_activation = torch.exp
-            self._inverse_scaling_activation = torch.log
-        elif scaling_activation == "softplus":
-            self._scaling_activation = F.softplus
-            self._inverse_scaling_activation = lambda x: x + torch.log(-torch.expm1(-x))
-        self._opacity_activation = torch.sigmoid
-        self._inverse_opacity_activation = lambda x: torch.log(x / (1 - x))
-        self.scale_bias = self._inverse_scaling_activation(torch.tensor(self.scaling_bias)).to(self.device)
-        self.rots_bias = torch.zeros(4, device=self.device)
-        self.rots_bias[0] = 1
-        self.opacity_bias_val = self._inverse_opacity_activation(torch.tensor(self.opacity_bias)).to(self.device)
-        self._storage = {}
-    def _get_store(self, name):
-        return self._storage.get(name)
-    def _set_store(self, name, value):
-        self._storage[name] = value
-    @property
-    def _xyz(self):
-        return self._get_store("_xyz")
-    @_xyz.setter
-    def _xyz(self, value):
-        if value is None:
-            self._set_store("_xyz", None); self._set_store("xyz", None); return
-        self._set_store("_xyz", value)
-        self._set_store("xyz", value * self.aabb[None, 3:] + self.aabb[None, :3])
-    @property
-    def get_xyz(self):
-        return self._get_store("xyz")
-    @property
-    def _features_dc(self):
-        return self._get_store("_features_dc")
-    @_features_dc.setter
-    def _features_dc(self, value):
-        self._set_store("_features_dc", value)
-    @property
-    def _opacity(self):
-        return self._get_store("_opacity")
-    @_opacity.setter
-    def _opacity(self, value):
-        if value is None:
-            self._set_store("_opacity", None); self._set_store("opacity", None); return
-        self._set_store("_opacity", value)
-        self._set_store("opacity", self._opacity_activation(value + self.opacity_bias_val))
-    @property
-    def get_opacity(self):
-        return self._get_store("opacity")
-    @property
-    def _scaling(self):
-        return self._get_store("_scaling")
-    @_scaling.setter
-    def _scaling(self, value):
-        if value is None:
-            self._set_store("_scaling", None); self._set_store("scaling", None); return
-        self._set_store("_scaling", value)
-        s = self._scaling_activation(value + self.scale_bias)
-        s = torch.square(s) + self.mininum_kernel_size ** 2
-        self._set_store("scaling", torch.sqrt(s))
-    @property
-    def get_scaling(self):
-        return self._get_store("scaling")
-    @property
-    def _rotation(self):
-        return self._get_store("_rotation")
-    @_rotation.setter
-    def _rotation(self, value):
-        self._set_store("_rotation", value)
-    def construct_list_of_attributes(self):
-        l = ['x', 'y', 'z', 'nx', 'ny', 'nz']
-        dc = self._features_dc
-        for i in range(dc.shape[1] * dc.shape[2]):
-            l.append(f'f_dc_{i}')
-        l.append('opacity')
-        for i in range(self._scaling.shape[1]):
-            l.append(f'scale_{i}')
-        for i in range(self._rotation.shape[1]):
-            l.append(f'rot_{i}')
-        return l
-    _DEFAULT_TRANSFORM = [[1, 0, 0], [0, 0, -1], [0, 1, 0]]
-    def _get_ply_data(self, transform=None):
-        xyz = self.get_xyz.detach().cpu().numpy()
-        normals = np.zeros_like(xyz)
-        f_dc = self._features_dc.detach().transpose(1, 2).flatten(start_dim=1).contiguous().cpu().numpy()
-        opacities = self._inverse_opacity_activation(self.get_opacity).detach().cpu().numpy()
-        scale = torch.log(self.get_scaling).detach().cpu().numpy()
-        rotation = (self._rotation + self.rots_bias[None, :]).detach().cpu().numpy()
-        if transform is not None:
-            transform = np.array(transform)
-            xyz = np.matmul(xyz, transform.T)
-            R_mat = _quat_to_matrix(rotation)
-            R_mat = np.matmul(transform, R_mat)
-            rotation = _matrix_to_quat(R_mat)
-        return xyz, normals, f_dc, opacities, scale, rotation
-    def _transformed_xyz_rot(self, transform=None):
-        if transform is None:
-            transform = self._DEFAULT_TRANSFORM
-        transform = np.array(transform, dtype=np.float32)
-        xyz = self.get_xyz.detach().cpu().numpy().astype(np.float32)
-        rotation = (self._rotation + self.rots_bias[None, :]).detach().cpu().numpy()
-        xyz = np.matmul(xyz, transform.T)
-        R_mat = _quat_to_matrix(rotation)
-        R_mat = np.matmul(transform, R_mat)
-        rotation = _matrix_to_quat(R_mat)
-        return xyz, rotation
-    def to_ply_bytes(self, transform=None) -> bytes:
-        if transform is None:
-            transform = self._DEFAULT_TRANSFORM
-        xyz, normals, f_dc, opacities, scale, rotation = self._get_ply_data(transform=transform)
-        dtype_full = [(attr, 'f4') for attr in self.construct_list_of_attributes()]
-        elements = np.empty(xyz.shape[0], dtype=dtype_full)
-        elements[:] = list(map(tuple, np.concatenate((xyz, normals, f_dc, opacities, scale, rotation), axis=1)))
-        return _binary_ply_bytes(elements, dtype_full)
-    def to_splat_bytes(self, transform=None) -> bytes:
-        if transform is None:
-            transform = self._DEFAULT_TRANSFORM
-        xyz, rotation = self._transformed_xyz_rot(transform=transform)
-        scale = self.get_scaling.detach().cpu().numpy().astype(np.float32)
-        opacity = self.get_opacity.detach().cpu().numpy()
-        f_dc = self._features_dc.detach().cpu().numpy()
-        C0 = 0.28209479177387814
-        # .splat packs color as 4 bytes RGBA: RGB from the SH DC term, A from opacity.
-        rgb = np.clip((f_dc[:, 0, :] * C0 + 0.5) * 255, 0, 255).astype(np.uint8)
-        alpha = np.clip(opacity[:, 0:1] * 255, 0, 255).astype(np.uint8)
-        rgba = np.concatenate([rgb, alpha], axis=1)
-        rot = rotation / np.linalg.norm(rotation, axis=-1, keepdims=True)
-        rot_u8 = np.clip(rot * 128 + 128, 0, 255).astype(np.uint8)
-        order = np.argsort(-opacity[:, 0] * np.prod(scale, axis=-1))
-        xyz, scale, rgba, rot_u8 = xyz[order], scale[order], rgba[order], rot_u8[order]
-        # Per-splat record is exactly 32 bytes: xyz(12) + scale(12) + rgba(4) + rot(4).
-        data = np.concatenate([
-            xyz.astype(np.float32).view(np.uint8).reshape(-1, 12),
-            scale.astype(np.float32).view(np.uint8).reshape(-1, 12),
-            rgba.reshape(-1, 4),
-            rot_u8.reshape(-1, 4),
-        ], axis=1).reshape(-1)
-        return data.tobytes()
-    def save_ply(self, path, transform=None):
-        with open(path, 'wb') as f:
-            f.write(self.to_ply_bytes(transform=transform))
-    def save_splat(self, path, transform=None):
-        with open(path, 'wb') as f:
-            f.write(self.to_splat_bytes(transform=transform))
-def _binary_ply_bytes(elements, dtype_full) -> bytes:
-    num_vertices = len(elements)
-    header = "ply\nformat binary_little_endian 1.0\n"
-    header += f"element vertex {num_vertices}\n"
-    type_map = {'f4': 'float', 'u1': 'uchar', 'i4': 'int'}
-    for name, t in dtype_full:
-        header += f"property {type_map.get(t, t)} {name}\n"
-    header += "end_header\n"
-    return header.encode('ascii') + elements.tobytes()
-def _quat_to_matrix(q):
-    q = q / np.linalg.norm(q, axis=-1, keepdims=True)
-    w, x, y, z = q[:, 0], q[:, 1], q[:, 2], q[:, 3]
-    R = np.stack([
-        1 - 2*(y*y + z*z), 2*(x*y - w*z),     2*(x*z + w*y),
-        2*(x*y + w*z),     1 - 2*(x*x + z*z), 2*(y*z - w*x),
-        2*(x*z - w*y),     2*(y*z + w*x),     1 - 2*(x*x + y*y),
-    ], axis=-1).reshape(-1, 3, 3)
-    return R
-def _matrix_to_quat(R):
-    trace = R[:, 0, 0] + R[:, 1, 1] + R[:, 2, 2]
-    q = np.zeros((R.shape[0], 4), dtype=R.dtype)
-    s = np.sqrt(np.maximum(trace + 1, 0)) * 2
-    q[:, 0] = 0.25 * s
-    q[:, 1] = (R[:, 2, 1] - R[:, 1, 2]) / np.where(s != 0, s, 1)
-    q[:, 2] = (R[:, 0, 2] - R[:, 2, 0]) / np.where(s != 0, s, 1)
-    q[:, 3] = (R[:, 1, 0] - R[:, 0, 1]) / np.where(s != 0, s, 1)
-    m01 = (R[:, 0, 0] >= R[:, 1, 1]) & (R[:, 0, 0] >= R[:, 2, 2]) & (s == 0)
-    s1 = np.sqrt(np.maximum(1 + R[:, 0, 0] - R[:, 1, 1] - R[:, 2, 2], 0)) * 2
-    q[m01, 0] = (R[m01, 2, 1] - R[m01, 1, 2]) / s1[m01]
-    q[m01, 1] = 0.25 * s1[m01]
-    q[m01, 2] = (R[m01, 0, 1] + R[m01, 1, 0]) / s1[m01]
-    q[m01, 3] = (R[m01, 0, 2] + R[m01, 2, 0]) / s1[m01]
-    m11 = (R[:, 1, 1] > R[:, 0, 0]) & (R[:, 1, 1] >= R[:, 2, 2]) & (s == 0)
-    s2 = np.sqrt(np.maximum(1 + R[:, 1, 1] - R[:, 0, 0] - R[:, 2, 2], 0)) * 2
-    q[m11, 0] = (R[m11, 0, 2] - R[m11, 2, 0]) / s2[m11]
-    q[m11, 1] = (R[m11, 0, 1] + R[m11, 1, 0]) / s2[m11]
-    q[m11, 2] = 0.25 * s2[m11]
-    q[m11, 3] = (R[m11, 1, 2] + R[m11, 2, 1]) / s2[m11]
-    m21 = (R[:, 2, 2] > R[:, 0, 0]) & (R[:, 2, 2] > R[:, 1, 1]) & (s == 0)
-    s3 = np.sqrt(np.maximum(1 + R[:, 2, 2] - R[:, 0, 0] - R[:, 1, 1], 0)) * 2
-    q[m21, 0] = (R[m21, 1, 0] - R[m21, 0, 1]) / s3[m21]
-    q[m21, 1] = (R[m21, 0, 2] + R[m21, 2, 0]) / s3[m21]
-    q[m21, 2] = (R[m21, 1, 2] + R[m21, 2, 1]) / s3[m21]
-    q[m21, 3] = 0.25 * s3[m21]
-    return q / np.linalg.norm(q, axis=-1, keepdims=True)
-def _build_gaussians(decoder: ElasticGaussianFixedlenDecoder, points_pred: dict, pred: dict):
-    x = points_pred
-    offset = decoder._get_offset(pred['features'])
-    h = pred["features"]
-    ret = []
-    for i in range(h.shape[0]):
-        g = Gaussian(
-            sh_degree=0,
-            aabb=[-0.5, -0.5, -0.5, 1.0, 1.0, 1.0],
-            mininum_kernel_size=decoder.rep_config['filter_kernel_size_3d'],
-            scaling_bias=decoder.rep_config['scaling_bias'],
-            opacity_bias=decoder.rep_config['opacity_bias'],
-            scaling_activation=decoder.rep_config['scaling_activation'],
-        )
-        _x = x["points"][i, :, None, :]
-        for k, v in decoder.layout.items():
-            if k == '_xyz':
-                setattr(g, k, (offset[i] + _x).flatten(0, 1))
-            elif k in ('_xyz_center', '_offset_scale'):
-                continue
-            else:
-                feats = h[i][:, v['range'][0]:v['range'][1]].reshape(-1, *v['shape']).flatten(0, 1)
-                setattr(g, k, feats * decoder.rep_config['lr'][k])
-        ret.append(g)
-    return ret
-# ---------------------------------------------------------------------------
-# Euler flow sampler
-# ---------------------------------------------------------------------------
-class FlowEulerCfgSampler:
-    def __init__(self, sigma_min: float = 1e-5):
-        self.sigma_min = sigma_min
-    def _get_batch_size(self, x_t):
-        return next(iter(x_t.values())).shape[0] if isinstance(x_t, dict) else x_t.shape[0]
-    def _get_device(self, x_t):
-        return next(iter(x_t.values())).device if isinstance(x_t, dict) else x_t.device
-    def _inference_model(self, model, x_t, t, cond=None):
-        batch = self._get_batch_size(x_t)
-        device = self._get_device(x_t)
-        t_scaled = torch.tensor([1000 * t] * batch, device=device, dtype=torch.float32)
-        if isinstance(cond, dict):
-            for k, v in cond.items():
-                if isinstance(v, torch.Tensor) and v.shape[0] == 1 and batch > 1:
-                    cond[k] = v.repeat(batch, *([1] * (len(v.shape) - 1)))
-        elif cond is not None and cond.shape[0] == 1 and batch > 1:
-            cond = cond.repeat(batch, *([1] * (len(cond.shape) - 1)))
-        return model(x_t, t_scaled, cond)
-    def _cfg_prediction(self, model, x_t, t, cond, neg_cond, guidance_scale):
-        # Diffusers-style convention: guidance_scale == 1 (or <= 1, or None) means no CFG —
-        # only the conditional pass runs, halving the per-step cost. > 1 enables CFG and
-        # blends as `pred = s * cond + (1 - s) * uncond = s * cond - (s - 1) * uncond`.
-        pred_v = self._inference_model(model, x_t, t, cond)
-        if isinstance(guidance_scale, dict):
-            if not any(s > 1 for s in guidance_scale.values()):
-                return pred_v
-            neg_pred_v = self._inference_model(model, x_t, t, neg_cond)
-            for key in pred_v:
-                s = guidance_scale.get(key, 1.0)
-                if s > 1:
-                    pred_v[key] = s * pred_v[key] - (s - 1) * neg_pred_v[key]
-            return pred_v
-        if guidance_scale is None or guidance_scale <= 1:
-            return pred_v
-        neg_pred_v = self._inference_model(model, x_t, t, neg_cond)
-        for key in pred_v:
-            pred_v[key] = guidance_scale * pred_v[key] - (guidance_scale - 1) * neg_pred_v[key]
-        return pred_v
-    @torch.no_grad()
-    def sample(self, model, noise, cond, neg_cond, steps=50, shift=1.0,
-               guidance_scale=None, show_progress=False, callback=None):
-        sample = noise
-        t_seq = shift * np.linspace(1, 0, steps + 1) / (1 + (shift - 1) * np.linspace(1, 0, steps + 1))
-        t_pairs = list(zip(t_seq[:-1], t_seq[1:]))
-        iterator = tqdm(t_pairs, desc="Sampling", total=steps) if show_progress else t_pairs
-        for i, (t, t_prev) in enumerate(iterator):
-            x_t = {k: v.clone() for k, v in sample.items()} if isinstance(sample, dict) else sample.clone()
-            pred_v = self._cfg_prediction(model, x_t, t, cond, neg_cond, guidance_scale)
-            dt = t - t_prev
-            if isinstance(sample, dict):
-                for key in sample:
-                    sample[key] = sample[key] - pred_v[key] * dt
-            else:
-                sample = sample - pred_v * dt
-            if callback is not None:
-                callback(i + 1, steps)
-        return sample
-# ---------------------------------------------------------------------------
-# Component loaders
-# ---------------------------------------------------------------------------
-def _place(m, device, dtype):
-    if device is not None or dtype is not None:
-        m = m.to(device=device, dtype=dtype)
-    return m.eval()
-def load_dinov3(path: str, device=None, dtype=None) -> DinoV3ViT:
-    m = DinoV3ViT()
-    m.load_safetensors(path)
-    return _place(m, device, dtype)
-def load_vae_encoder(path: str, device=None, dtype=None) -> Flux2VAEEncoder:
-    m = Flux2VAEEncoder()
-    m.load_safetensors(path)
-    return _place(m, device, dtype)
-def load_rmbg(path: str, device=None, dtype=None) -> BiRefNet:
-    m = BiRefNet()
-    m.load_safetensors(path)
-    return _place(m, device, dtype)
-FLOW_MODEL_ARGS = dict(
-    q_token_length=8192, in_channels=16, cam_channels=5, out_channels=16,
-    model_channels=1024, cond_channels=1280, cond2_channels=128,
-    num_refiner_blocks=2, num_blocks=24, num_heads=16, mlp_ratio=4,
-    qk_rms_norm=True, share_mod=True, use_shift_table=True,
-)
-def load_flow_model(path: str, device=None, dtype=None) -> LatentSeqMMFlowModel:
-    m = LatentSeqMMFlowModel(**FLOW_MODEL_ARGS)
-    m.load_safetensors(path)
-    return _place(m, device, dtype)
-OCTREE_DECODER_ARGS = dict(
-    model_channels=1024, cond_channels=16,
-    num_blocks=4, num_heads=16, mlp_ratio=4, share_mod=True,
-)
-GS_DECODER_ARGS = dict(
-    in_channels=3, model_channels=1024, cond_channels=16,
-    attn_mode="full", num_blocks=16, num_heads=16, mlp_ratio=4,
-    use_learned_offset_scale=True, use_per_offset=True,
-    representation_config=dict(
-        lr=dict(_xyz=1.0, _features_dc=1.0, _opacity=1.0, _scaling=1.0, _rotation=0.1),
-        perturb_offset=True, perturbe_size=1.5, offset_scale=0.05, num_gaussians=32,
-        filter_kernel_size_3d=0.0009, scaling_bias=0.004, opacity_bias=0.1,
-        scaling_activation="softplus",
-    ),
-)
-def load_decoder(path: str, device=None, dtype=None) -> OctreeGaussianDecoder:
-    m = OctreeGaussianDecoder(OCTREE_DECODER_ARGS, GS_DECODER_ARGS)
-    m.load_safetensors(path)
-    return _place(m, device, dtype)
-# ---------------------------------------------------------------------------
-# Pipeline stages
-# ---------------------------------------------------------------------------
-_CANVAS_SIZE = 1024
-def _image_to_pil(image) -> Image.Image:
-    if isinstance(image, Image.Image):
-        return image
-    if isinstance(image, (str, bytes)) or hasattr(image, "__fspath__"):
-        return Image.open(image)
-    if isinstance(image, torch.Tensor):
-        t = image.detach().cpu()
-        if t.ndim == 4:
-            assert t.shape[0] == 1, (
-                f"batched image input is not supported (got B={t.shape[0]}); "
-                "pass one image at a time"
-            )
-            t = t[0]
-        arr = (t.clamp(0, 1) * 255).to(torch.uint8).numpy()
-        mode = "RGBA" if arr.shape[-1] == 4 else "RGB"
-        return Image.fromarray(arr, mode=mode)
-    raise TypeError(f"unsupported image type: {type(image)}")
-def preprocess_image(image, rmbg: BiRefNet, erode_radius: int = 1) -> Image.Image:
-    image = _image_to_pil(image)
-    size = _CANVAS_SIZE
-    w, h = image.size
-    s = size / min(w, h)
-    image = image.resize((max(1, int(round(w * s))), max(1, int(round(h * s)))), Image.LANCZOS)
-    has_real_alpha = (image.mode == "RGBA"
-                      and np.array(image.getchannel(3), dtype=np.int32).min() < 255)
-    if not has_real_alpha:
-        image = rmbg.remove_background(image.convert("RGB"))
-    if erode_radius > 0:
-        image.putalpha(image.getchannel(3).filter(ImageFilter.MinFilter(2 * erode_radius + 1)))
-    alpha = np.array(image.getchannel(3))
-    ys, xs = np.nonzero(alpha)
-    bbox = [xs.min(), ys.min(), xs.max(), ys.max()]
-    cx, cy = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2
-    half = max(bbox[2] - bbox[0], bbox[3] - bbox[1]) / 2 * 1.2
-    image = image.crop([int(cx - half), int(cy - half), int(cx + half), int(cy + half)])
-    image = image.resize((size, size), Image.LANCZOS)
-    bg = Image.new("RGB", (size, size), (0, 0, 0))
-    bg.paste(image, mask=image.split()[3])
-    return bg
-_DINOV3_NORMALIZE = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-@torch.no_grad()
-def encode_image(image: Image.Image, dinov3: DinoV3ViT, vae_encoder: Flux2VAEEncoder,
-                 generator: torch.Generator = None) -> dict:
-    device = next(dinov3.parameters()).device
-    img_tensor   = transforms.ToTensor()(image).unsqueeze(0).to(device=device, dtype=torch.float32)
-    img_normed   = _DINOV3_NORMALIZE(img_tensor)
-    dinov3_dtype = next(dinov3.parameters()).dtype
-    vae_dtype    = next(vae_encoder.parameters()).dtype
-    dinov3_feat = dinov3(pixel_values=img_normed.to(dinov3_dtype))
-    dinov3_feat = F.layer_norm(dinov3_feat.float(), dinov3_feat.shape[-1:])
-    vae_feat = vae_encoder.encode(img_tensor.to(vae_dtype) * 2 - 1,
-                                  deterministic=False, generator=generator)
-    # pad 5 zero tokens so feature2's token length matches feature1's (cls + 4 registers + patches)
-    zero_reg = torch.zeros(vae_feat.shape[0], 5, vae_feat.shape[2],
-                           dtype=vae_feat.dtype, device=vae_feat.device)
-    vae_feat = torch.cat([zero_reg, vae_feat], dim=1)
-    return {'feature1': dinov3_feat, 'feature2': vae_feat}
-@torch.no_grad()
-def sample_latent(flow_model: LatentSeqMMFlowModel, cond: dict,
-                  steps: int = 50, guidance_scale: float = 7.0, shift: float = 3.0,
-                  generator: torch.Generator = None,
-                  show_progress: bool = False, callback=None) -> dict:
-    device = flow_model.device
-    neg_cond = {k: torch.zeros_like(v) for k, v in cond.items()}
-    noise = {'latent': torch.randn(1, flow_model.q_token_length, flow_model.in_channels,
-                                   device=device, generator=generator)}
-    if flow_model.cam_channels is not None:
-        noise['camera'] = torch.randn(1, 1, flow_model.cam_channels,
-                                      device=device, generator=generator)
-    sampler = FlowEulerCfgSampler()
-    return sampler.sample(flow_model, noise, cond=cond, neg_cond=neg_cond,
-                          steps=steps, guidance_scale=guidance_scale, shift=shift,
-                          show_progress=show_progress, callback=callback)
-# ---------------------------------------------------------------------------
-# Pipeline
-# ---------------------------------------------------------------------------
-class TripoSplatPipeline:
-    def __init__(self, ckpt_path: str, decoder_path: str, dinov3_path: str,
-                 flux2_vae_encoder_path: str, rmbg_path: str, device: str = "cuda"):
-        self._device = torch.device(device)
-        self.dinov3      = load_dinov3      (dinov3_path,             device=self._device, dtype=torch.bfloat16)
-        self.vae_encoder = load_vae_encoder (flux2_vae_encoder_path,  device=self._device, dtype=torch.bfloat16)
-        self.rmbg        = load_rmbg        (rmbg_path,               device=self._device, dtype=torch.float16)
-        self.flow_model  = load_flow_model  (ckpt_path,               device=self._device, dtype=torch.float16)
-        self.decoder     = load_decoder     (decoder_path,            device=self._device, dtype=torch.float16)
-    def preprocess_image(self, image, erode_radius: int = 1) -> Image.Image:
-        return preprocess_image(image, self.rmbg, erode_radius=erode_radius)
-    def encode_image(self, image: Image.Image, generator: torch.Generator = None) -> dict:
-        return encode_image(image, self.dinov3, self.vae_encoder, generator=generator)
-    def sample_latent(self, cond: dict, steps: int = 50, guidance_scale: float = 7.0,
-                      shift: float = 3.0, generator: torch.Generator = None,
-                      show_progress: bool = False, callback=None) -> dict:
-        return sample_latent(self.flow_model, cond, steps=steps, guidance_scale=guidance_scale,
-                             shift=shift, generator=generator,
-                             show_progress=show_progress, callback=callback)
-    def decode_latent(self, latent: torch.Tensor, num_gaussians: int = 262144):
-        return self.decoder.decode(latent, num_gaussians=num_gaussians)
-    _NUM_GAUSSIANS_MIN = 32768
-    _NUM_GAUSSIANS_MAX = 262144
-    def _validate_num_gaussians(self, n: int) -> int:
-        assert self._NUM_GAUSSIANS_MIN <= n <= self._NUM_GAUSSIANS_MAX, (
-            f"num_gaussians must be in [{self._NUM_GAUSSIANS_MIN}, {self._NUM_GAUSSIANS_MAX}], got {n}"
-        )
-        gpp = self.decoder.gaussians_per_point
-        if n % gpp == 0:
-            return n
-        rounded = round(n / gpp) * gpp
-        print(f"[TripoSplatPipeline] num_gaussians={n} is not a multiple of {gpp}; rounding to {rounded}")
-        return rounded
-    @torch.no_grad()
-    def run(self, image, seed: int = 42, steps: int = 20, guidance_scale: float = 3.0,
-            shift: float = 3.0, num_gaussians=262144, erode_radius: int = 1,
-            show_progress: bool = False, callback=None):
-        """
-        Args:
-            image: Input image. Accepts a file path / PIL.Image / torch.Tensor
-                (`[1,H,W,C]` or `[H,W,C]`, float in `[0, 1]`, optional alpha
-                channel as the 4th channel).
-            seed: RNG seed for the VAE encoder's stochastic latent sampling and
-                the initial flow-matching noise. Same seed → same output.
-            steps: Number of Euler integrator steps in the flow-matching sampler.
-                More steps → better fidelity, linear runtime cost.
-                Recommend: 10~20.
-            guidance_scale: Classifier-free-guidance strength (diffusers
-                convention). `≤ 1.0` disables CFG. Higher → more detail,
-                stronger adherence to the input image; too high can cause color
-                oversaturation.
-                Recommend: 3.0.
-            shift: Flow-matching timestep schedule shift. `1.0` gives a uniform
-                schedule; `>1.0` allocates more steps to the early/high-noise end.
-                Recommend: 3.0.
-            num_gaussians: Target Gaussian-splat count. An `int` returns a
-                single `Gaussian`. A `list` / `tuple` of ints returns a
-                `list[Gaussian]`. Each count is rounded to the nearest multiple
-                of 32. More gaussians → more detail but higher rendering and
-                storage cost.
-                Recommend: 32768~262144.
-            erode_radius: Pixel radius used to erode the alpha matte after
-                background removal, to avoid segmentation-border bleed before
-                compositing on black. `0` disables; `1` is a 3×3 minimum filter.
-                Recommend: 1.
-            show_progress: Print a `tqdm` progress bar over sampler steps.
-            callback: Optional `fn(step, total)` invoked after each sampler step.
-                Useful for external progress UIs (e.g. ComfyUI's
-                `ProgressBar.update`).
-        Returns:
-            `(gaussian, prepared_image)` for an `int` `num_gaussians`, or
-            `(list_of_gaussians, prepared_image)` for a `list` / `tuple`. The
-            second element is the RGB composite the encoders actually saw —
-            useful for display / debugging.
-        """
-        if isinstance(num_gaussians, (list, tuple)):
-            counts = [self._validate_num_gaussians(n) for n in num_gaussians]
-        else:
-            counts = [self._validate_num_gaussians(num_gaussians)]
-        gen = torch.Generator(device=self._device).manual_seed(seed)
-        prepared = self.preprocess_image(image, erode_radius=erode_radius)
-        cond = self.encode_image(prepared, generator=gen)
-        out = self.sample_latent(cond, steps=steps, guidance_scale=guidance_scale, shift=shift,
-                                 generator=gen, show_progress=show_progress, callback=callback)
-        gaussians = [self.decode_latent(out['latent'], num_gaussians=n) for n in counts]
-        if isinstance(num_gaussians, (list, tuple)):
-            return gaussians, prepared
-        return gaussians[0], prepared