Spaces:

ritianyu
/

InfiniDepth

Running on Zero

App Files Files Community

ritianyu commited on Mar 17

Commit

e4b4a0d

1 Parent(s): 3b93851

update

Browse files

Files changed (9) hide show

InfiniDepth/gs/__init__.py +0 -11
InfiniDepth/gs/adapter.py +0 -90
InfiniDepth/gs/ply.py +0 -232
InfiniDepth/gs/predictor.py +0 -139
InfiniDepth/gs/projection.py +0 -53
InfiniDepth/gs/types.py +0 -14
InfiniDepth/utils/depth_video_utils.py +0 -250
InfiniDepth/utils/gs_utils.py +0 -289
InfiniDepth/utils/inference_utils.py +1 -66

InfiniDepth/gs/__init__.py DELETED Viewed

@@ -1,11 +0,0 @@
-"""Lightweight Gaussian Splatting inference utilities."""
-from .types import Gaussians
-from .predictor import GSPixelAlignPredictor
-from .ply import export_ply
-__all__ = [
-    "Gaussians",
-    "GSPixelAlignPredictor",
-    "export_ply",
-]

InfiniDepth/gs/adapter.py DELETED Viewed

@@ -1,90 +0,0 @@
-from dataclasses import dataclass
-import torch
-import torch.nn.functional as F
-from torch import nn
-from .projection import get_world_rays
-from .types import Gaussians
-def rgb_to_sh(rgb: torch.Tensor) -> torch.Tensor:
-    c0 = 0.28209479177387814
-    return (rgb - 0.5) / c0
-@dataclass
-class GaussianAdapterCfg:
-    gaussian_scale_min: float = 1e-10
-    gaussian_scale_max: float = 5.0
-    sh_degree: int = 2
-class GaussianAdapter(nn.Module):
-    def __init__(self, cfg: GaussianAdapterCfg) -> None:
-        super().__init__()
-        self.cfg = cfg
-        self.register_buffer("sh_mask", torch.ones((self.d_sh,), dtype=torch.float32), persistent=False)
-        for degree in range(1, self.cfg.sh_degree + 1):
-            self.sh_mask[degree**2 : (degree + 1) ** 2] = 0.1 * (0.25**degree)
-    @property
-    def d_sh(self) -> int:
-        return (self.cfg.sh_degree + 1) ** 2
-    @property
-    def d_in(self) -> int:
-        return 7 + 3 * self.d_sh
-    def forward(
-        self,
-        image: torch.Tensor,
-        extrinsics: torch.Tensor,
-        intrinsics: torch.Tensor,
-        coordinates_xy: torch.Tensor,
-        depths: torch.Tensor,
-        opacities: torch.Tensor,
-        raw_gaussians: torch.Tensor,
-    ) -> Gaussians:
-        """Build world-space gaussians from per-point raw parameters.
-        image: [B, 3, H, W]
-        extrinsics: [B, 4, 4] camera-to-world
-        intrinsics: [B, 3, 3]
-        coordinates_xy: [B, N, 2] pixel-space (x, y)
-        depths: [B, N]
-        opacities: [B, N]
-        raw_gaussians: [B, N, 7 + 3*d_sh]
-        """
-        b, _, h, w = image.shape
-        scales_raw, rotations_raw, sh_raw = torch.split(raw_gaussians, [3, 4, 3 * self.d_sh], dim=-1)
-        scales = torch.clamp(
-            F.softplus(scales_raw - 4.0),
-            min=self.cfg.gaussian_scale_min,
-            max=self.cfg.gaussian_scale_max,
-        )
-        rotations = rotations_raw / (torch.norm(rotations_raw, dim=-1, keepdim=True) + 1e-8)
-        harmonics = sh_raw.view(b, -1, 3, self.d_sh) * self.sh_mask.view(1, 1, 1, -1)
-        # Initialize DC term from image color sampled at gaussian centers.
-        x = coordinates_xy[..., 0]
-        y = coordinates_xy[..., 1]
-        grid_x = (x / max(float(w), 1.0)) * 2.0 - 1.0
-        grid_y = (y / max(float(h), 1.0)) * 2.0 - 1.0
-        grid = torch.stack([grid_x, grid_y], dim=-1).unsqueeze(2)  # [B, N, 1, 2]
-        sampled_rgb = F.grid_sample(image, grid, mode="bilinear", align_corners=False)
-        sampled_rgb = sampled_rgb.squeeze(-1).permute(0, 2, 1)  # [B, N, 3]
-        harmonics[..., 0] = harmonics[..., 0] + rgb_to_sh(sampled_rgb)
-        origins, directions = get_world_rays(coordinates_xy, extrinsics, intrinsics)
-        means = origins + directions * depths.unsqueeze(-1)
-        return Gaussians(
-            means=means,
-            harmonics=harmonics,
-            opacities=opacities,
-            scales=scales,
-            rotations=rotations,
-            covariances=None,
-        )

InfiniDepth/gs/ply.py DELETED Viewed

@@ -1,232 +0,0 @@
-from pathlib import Path
-import numpy as np
-import torch
-from jaxtyping import Float
-from plyfile import PlyData, PlyElement
-from torch import Tensor
-def _construct_attributes(d_sh: int) -> list[str]:
-    attrs = ["x", "y", "z", "nx", "ny", "nz", "f_dc_0", "f_dc_1", "f_dc_2"]
-    n_rest = 3 * max(d_sh - 1, 0)
-    attrs.extend([f"f_rest_{i}" for i in range(n_rest)])
-    attrs.extend(["opacity", "scale_0", "scale_1", "scale_2", "rot_0", "rot_1", "rot_2", "rot_3"])
-    return attrs
-def export_ply(
-    means: Float[Tensor, "gaussian 3"],
-    harmonics: Float[Tensor, "gaussian 3 d_sh"],
-    opacities: Float[Tensor, " gaussian"],
-    path: str | Path,
-    scales: Float[Tensor, "gaussian 3"] | None = None,
-    rotations: Float[Tensor, "gaussian 4"] | None = None,
-    covariances: Float[Tensor, "gaussian 3 3"] | None = None,  # Use covariances directly
-    shift_and_scale: bool = True,
-    save_sh_dc_only: bool = True,  # Changed default to False to preserve quality
-    center_method: str = "mean",  # "mean", "median", or "bbox_center"
-    apply_coordinate_transform: bool = True,  # Apply x90° rotation for viewer compatibility
-    focal_length_px: float | tuple[float, float] | None = None,
-    image_shape: tuple[int, int] | None = None,  # (height, width)
-    extrinsic_matrix: np.ndarray | torch.Tensor | None = None,
-    color_space_index: int | None = None,
-):
-    path = Path(path)
-    # Check input consistency
-    if covariances is None and (scales is None or rotations is None):
-        raise ValueError("Either provide covariances or both scales and rotations")
-    # Fast covariance to scale/rotation conversion using batch operations
-    if covariances is not None:
-        # Batch eigenvalue decomposition - much faster than individual decompositions
-        eigenvalues, eigenvectors = torch.linalg.eigh(covariances)
-        scales = torch.sqrt(torch.clamp(eigenvalues, min=1e-8))
-        # Fast batch conversion from rotation matrices to quaternions
-        # Using direct mathematical conversion instead of scipy loops
-        def rotation_matrix_to_quaternion_batch(R):
-            """Fast batch conversion from rotation matrices to quaternions"""
-            trace = R[..., 0, 0] + R[..., 1, 1] + R[..., 2, 2]
-            # Pre-allocate quaternion tensor
-            quat = torch.zeros(R.shape[0], 4, dtype=R.dtype, device=R.device)
-            # Case 1: trace > 0
-            mask1 = trace > 0
-            if mask1.any():
-                s = torch.sqrt(trace[mask1] + 1.0) * 2  # s = 4 * qw
-                quat[mask1, 0] = 0.25 * s  # qw
-                quat[mask1, 1] = (R[mask1, 2, 1] - R[mask1, 1, 2]) / s  # qx
-                quat[mask1, 2] = (R[mask1, 0, 2] - R[mask1, 2, 0]) / s  # qy
-                quat[mask1, 3] = (R[mask1, 1, 0] - R[mask1, 0, 1]) / s  # qz
-            # Case 2: R[0,0] > R[1,1] and R[0,0] > R[2,2]
-            mask2 = ~mask1 & (R[..., 0, 0] > R[..., 1, 1]) & (R[..., 0, 0] > R[..., 2, 2])
-            if mask2.any():
-                s = torch.sqrt(1.0 + R[mask2, 0, 0] - R[mask2, 1, 1] - R[mask2, 2, 2]) * 2
-                quat[mask2, 0] = (R[mask2, 2, 1] - R[mask2, 1, 2]) / s  # qw
-                quat[mask2, 1] = 0.25 * s  # qx
-                quat[mask2, 2] = (R[mask2, 0, 1] + R[mask2, 1, 0]) / s  # qy
-                quat[mask2, 3] = (R[mask2, 0, 2] + R[mask2, 2, 0]) / s  # qz
-            # Case 3: R[1,1] > R[2,2]
-            mask3 = ~mask1 & ~mask2 & (R[..., 1, 1] > R[..., 2, 2])
-            if mask3.any():
-                s = torch.sqrt(1.0 + R[mask3, 1, 1] - R[mask3, 0, 0] - R[mask3, 2, 2]) * 2
-                quat[mask3, 0] = (R[mask3, 0, 2] - R[mask3, 2, 0]) / s  # qw
-                quat[mask3, 1] = (R[mask3, 0, 1] + R[mask3, 1, 0]) / s  # qx
-                quat[mask3, 2] = 0.25 * s  # qy
-                quat[mask3, 3] = (R[mask3, 1, 2] + R[mask3, 2, 1]) / s  # qz
-            # Case 4: else
-            mask4 = ~mask1 & ~mask2 & ~mask3
-            if mask4.any():
-                s = torch.sqrt(1.0 + R[mask4, 2, 2] - R[mask4, 0, 0] - R[mask4, 1, 1]) * 2
-                quat[mask4, 0] = (R[mask4, 1, 0] - R[mask4, 0, 1]) / s  # qw
-                quat[mask4, 1] = (R[mask4, 0, 2] + R[mask4, 2, 0]) / s  # qx
-                quat[mask4, 2] = (R[mask4, 1, 2] + R[mask4, 2, 1]) / s  # qy
-                quat[mask4, 3] = 0.25 * s  # qz
-            return quat
-        # Ensure proper rotation matrices
-        det = torch.det(eigenvectors)
-        eigenvectors = torch.where(det.unsqueeze(-1).unsqueeze(-1) < 0,
-                                 -eigenvectors, eigenvectors)
-        # Fast batch conversion
-        rotations = rotation_matrix_to_quaternion_batch(eigenvectors)
-    # Apply centering - vectorized operations
-    if shift_and_scale:
-        if center_method == "mean":
-            center = means.mean(dim=0)
-        elif center_method == "median":
-            center = means.median(dim=0).values
-        elif center_method == "bbox_center":
-            center = (means.min(dim=0).values + means.max(dim=0).values) / 2
-        else:
-            raise ValueError(f"Unknown center_method: {center_method}")
-        means = means - center
-    # Fast coordinate transformation using batch operations
-    if apply_coordinate_transform:
-        # X-axis 90° rotation matrix
-        rot_x = torch.tensor([
-            [1, 0, 0],
-            [0, 0, -1],
-            [0, 1, 0]
-        ], dtype=means.dtype, device=means.device)
-        # Apply to positions - batch matrix multiplication
-        means = means @ rot_x.T
-        # Apply to rotations - batch quaternion operations
-        transform_quat = torch.tensor([0.7071068, 0.7071068, 0.0, 0.0],
-                                    dtype=rotations.dtype, device=rotations.device)  # 90° around X
-        # Batch quaternion multiplication
-        w1, x1, y1, z1 = transform_quat[0], transform_quat[1], transform_quat[2], transform_quat[3]
-        w2, x2, y2, z2 = rotations[:, 0], rotations[:, 1], rotations[:, 2], rotations[:, 3]
-        rotations = torch.stack([
-            w1*w2 - x1*x2 - y1*y2 - z1*z2,  # w
-            w1*x2 + x1*w2 + y1*z2 - z1*y2,  # x
-            w1*y2 - x1*z2 + y1*w2 + z1*x2,  # y
-            w1*z2 + x1*y2 - y1*x2 + z1*w2   # z
-        ], dim=1)
-    # Convert to numpy for PLY writing - single conversion
-    means_np = means.detach().cpu().numpy()
-    scales_np = scales.detach().cpu().numpy()
-    rotations_np = rotations.detach().cpu().numpy()
-    opacities_np = opacities.detach().cpu().numpy()
-    harmonics_np = harmonics.detach().cpu().numpy()
-    # Process harmonics
-    f_dc = harmonics_np[..., 0]
-    f_rest = harmonics_np[..., 1:].reshape(harmonics_np.shape[0], -1)
-    d_sh = harmonics_np.shape[-1]
-    dtype_full = [
-        (attribute, "f4")
-        for attribute in _construct_attributes(1 if save_sh_dc_only else d_sh)
-    ]
-    elements = np.empty(means_np.shape[0], dtype=dtype_full)
-    # Build attributes list
-    attributes = [
-        means_np,
-        np.zeros_like(means_np),  # normals
-        f_dc,
-    ]
-    if not save_sh_dc_only:
-        attributes.append(f_rest)
-    # Apply inverse sigmoid to opacity for storage (viewer will apply sigmoid when loading)
-    # logit(opacity) = log(opacity / (1 - opacity))
-    opacities_clamped = np.clip(opacities_np, 1e-6, 1 - 1e-6)  # Clamp to avoid log(0) or log(inf)
-    opacities_logit = np.log(opacities_clamped / (1 - opacities_clamped))
-    attributes.extend([
-        opacities_logit.reshape(-1, 1),
-        np.log(scales_np),
-        rotations_np
-    ])
-    attributes = np.concatenate(attributes, axis=1)
-    elements[:] = list(map(tuple, attributes))
-    path.parent.mkdir(exist_ok=True, parents=True)
-    ply_elements = [PlyElement.describe(elements, "vertex")]
-    if focal_length_px is not None and image_shape is not None:
-        image_height, image_width = image_shape
-        if isinstance(focal_length_px, tuple):
-            fx, fy = float(focal_length_px[0]), float(focal_length_px[1])
-        else:
-            fx = fy = float(focal_length_px)
-        dtype_image_size = [("image_size", "u4")]
-        image_size_array = np.empty(2, dtype=dtype_image_size)
-        image_size_array[:] = np.array([image_width, image_height], dtype=np.uint32)
-        ply_elements.append(PlyElement.describe(image_size_array, "image_size"))
-        dtype_intrinsic = [("intrinsic", "f4")]
-        intrinsic_array = np.empty(9, dtype=dtype_intrinsic)
-        intrinsic = np.array(
-            [
-                fx,
-                0.0,
-                image_width * 0.5,
-                0.0,
-                fy,
-                image_height * 0.5,
-                0.0,
-                0.0,
-                1.0,
-            ],
-            dtype=np.float32,
-        )
-        intrinsic_array[:] = intrinsic.flatten()
-        ply_elements.append(PlyElement.describe(intrinsic_array, "intrinsic"))
-        dtype_extrinsic = [("extrinsic", "f4")]
-        extrinsic_array = np.empty(16, dtype=dtype_extrinsic)
-        if extrinsic_matrix is None:
-            extrinsic_np = np.eye(4, dtype=np.float32)
-        elif torch.is_tensor(extrinsic_matrix):
-            extrinsic_np = extrinsic_matrix.detach().cpu().numpy().astype(np.float32)
-        else:
-            extrinsic_np = np.asarray(extrinsic_matrix, dtype=np.float32)
-        if extrinsic_np.shape != (4, 4):
-            raise ValueError(f"extrinsic_matrix must have shape (4,4), got {extrinsic_np.shape}")
-        extrinsic_array[:] = extrinsic_np.flatten()
-        ply_elements.append(PlyElement.describe(extrinsic_array, "extrinsic"))
-        dtype_color_space = [("color_space", "u1")]
-        color_space_array = np.empty(1, dtype=dtype_color_space)
-        color_space_array[:] = np.array([1 if color_space_index is None else color_space_index], dtype=np.uint8)
-        ply_elements.append(PlyElement.describe(color_space_array, "color_space"))
-    PlyData(ply_elements).write(path)

InfiniDepth/gs/predictor.py DELETED Viewed

@@ -1,139 +0,0 @@
-from dataclasses import dataclass
-import torch
-from torch import nn
-from .adapter import GaussianAdapter, GaussianAdapterCfg
-from .projection import sample_image_grid
-from .types import Gaussians
-@dataclass
-class GSPredictorCfg:
-    rgb_feature_dim: int = 64
-    depth_feature_dim: int = 32
-    dino_reduced_dim: int = 128
-    gaussian_regressor_channels: int = 64
-    num_surfaces: int = 1
-    gaussian_scale_min: float = 1e-10
-    gaussian_scale_max: float = 5.0
-    sh_degree: int = 2
-class GSPixelAlignPredictor(nn.Module):
-    def __init__(self, dino_feature_dim: int = 1024, cfg: GSPredictorCfg | None = None) -> None:
-        super().__init__()
-        self.cfg = cfg or GSPredictorCfg()
-        cfg = self.cfg
-        self.rgb_encoder = nn.Sequential(
-            nn.Conv2d(3, 32, 3, 1, 1),
-            nn.GELU(),
-            nn.Conv2d(32, cfg.rgb_feature_dim, 3, 1, 1),
-            nn.GELU(),
-        )
-        self.depth_encoder = nn.Sequential(
-            nn.Conv2d(1, 16, 3, 1, 1),
-            nn.GELU(),
-            nn.Conv2d(16, cfg.depth_feature_dim, 3, 1, 1),
-            nn.GELU(),
-        )
-        self.dino_projector = nn.Sequential(
-            nn.Conv2d(dino_feature_dim, 256, 1),
-            nn.GELU(),
-            nn.Conv2d(256, cfg.dino_reduced_dim, 1),
-        )
-        reg_in = cfg.rgb_feature_dim + cfg.depth_feature_dim + cfg.dino_reduced_dim
-        self.gaussian_regressor = nn.Sequential(
-            nn.Conv2d(reg_in, cfg.gaussian_regressor_channels, 3, 1, 1),
-            nn.GELU(),
-            nn.Conv2d(cfg.gaussian_regressor_channels, cfg.gaussian_regressor_channels, 3, 1, 1),
-        )
-        self.gaussian_adapter = GaussianAdapter(
-            GaussianAdapterCfg(
-                gaussian_scale_min=cfg.gaussian_scale_min,
-                gaussian_scale_max=cfg.gaussian_scale_max,
-                sh_degree=cfg.sh_degree,
-            )
-        )
-        num_gaussian_parameters = self.gaussian_adapter.d_in + 2 + 1
-        head_in = cfg.gaussian_regressor_channels + cfg.rgb_feature_dim + cfg.dino_reduced_dim
-        self.gaussian_head = nn.Sequential(
-            nn.Conv2d(head_in, num_gaussian_parameters, 3, 1, 1, padding_mode="replicate"),
-            nn.GELU(),
-            nn.Conv2d(num_gaussian_parameters, num_gaussian_parameters, 3, 1, 1, padding_mode="replicate"),
-        )
-    @torch.no_grad()
-    def load_from_infinisplat_checkpoint(self, checkpoint_path: str) -> None:
-        checkpoint = torch.load(checkpoint_path, map_location="cpu")
-        state_dict = checkpoint.get("state_dict", checkpoint)
-        own_sd = self.state_dict()
-        load_sd = {}
-        for k, _ in own_sd.items():
-            prefixed = f"encoder.{k}"
-            if prefixed in state_dict and state_dict[prefixed].shape == own_sd[k].shape:
-                load_sd[k] = state_dict[prefixed]
-        self.load_state_dict(load_sd, strict=False)
-    def _tokens_to_feature_map(self, dino_tokens: torch.Tensor, h: int, w: int) -> torch.Tensor:
-        b, n_all, c = dino_tokens.shape
-        patch_h = h // 16
-        patch_w = w // 16
-        n_patch = patch_h * patch_w
-        if n_all < n_patch:
-            raise ValueError(f"Invalid token count: got {n_all}, expected at least {n_patch}")
-        n_reg = n_all - n_patch
-        patch_tokens = dino_tokens[:, n_reg:, :]  # [B, patch_h*patch_w, C]
-        patch_tokens = patch_tokens.reshape(b, patch_h, patch_w, c).permute(0, 3, 1, 2)
-        return torch.nn.functional.interpolate(
-            patch_tokens, size=(h, w), mode="bilinear", align_corners=False
-        )
-    def forward(
-        self,
-        image: torch.Tensor,
-        depthmap: torch.Tensor,
-        dino_tokens: torch.Tensor,
-        intrinsics: torch.Tensor,
-        extrinsics: torch.Tensor,
-    ) -> Gaussians:
-        b, _, h, w = image.shape
-        dino_map = self._tokens_to_feature_map(dino_tokens, h, w)
-        rgb_feat = self.rgb_encoder(image)
-        depth_feat = self.depth_encoder(depthmap)
-        dino_feat = self.dino_projector(dino_map)
-        reg_input = torch.cat([rgb_feat, depth_feat, dino_feat], dim=1)
-        reg_feat = self.gaussian_regressor(reg_input)
-        head_input = torch.cat([reg_feat, rgb_feat, dino_feat], dim=1)
-        raw = self.gaussian_head(head_input)  # [B, Cg, H, W]
-        raw = raw.permute(0, 2, 3, 1).reshape(b, h * w, -1)  # [B, HW, Cg]
-        opacities = torch.sigmoid(raw[..., :1]).squeeze(-1)   # [B, HW]
-        gaussian_core = raw[..., 1:]                          # [B, HW, Cg-1]
-        # One surface per pixel in this lightweight integration.
-        offset_xy = torch.sigmoid(gaussian_core[..., :2])     # [B, HW, 2], in [0,1]
-        raw_gaussians = gaussian_core[..., 2:]                # [B, HW, 7+3*d_sh]
-        base = sample_image_grid(h, w, image.device).unsqueeze(0).expand(b, -1, -1)
-        coords = base + (offset_xy - 0.5)
-        coords[..., 0].clamp_(0.0, float(w - 1))
-        coords[..., 1].clamp_(0.0, float(h - 1))
-        depths = depthmap[:, 0].reshape(b, -1)
-        return self.gaussian_adapter(
-            image=image,
-            extrinsics=extrinsics,
-            intrinsics=intrinsics,
-            coordinates_xy=coords,
-            depths=depths,
-            opacities=opacities,
-            raw_gaussians=raw_gaussians,
-        )

InfiniDepth/gs/projection.py DELETED Viewed

@@ -1,53 +0,0 @@
-import torch
-def homogenize_points(points: torch.Tensor) -> torch.Tensor:
-    return torch.cat([points, torch.ones_like(points[..., :1])], dim=-1)
-def homogenize_vectors(vectors: torch.Tensor) -> torch.Tensor:
-    return torch.cat([vectors, torch.zeros_like(vectors[..., :1])], dim=-1)
-def transform_cam2world(homogeneous: torch.Tensor, extrinsics: torch.Tensor) -> torch.Tensor:
-    return torch.matmul(extrinsics, homogeneous.unsqueeze(-1)).squeeze(-1)
-def unproject(coordinates_xy: torch.Tensor, z: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tensor:
-    """Unproject pixel-space xy to camera space using z depth.
-    coordinates_xy: [B, N, 2] in pixel coordinates (x, y)
-    z: [B, N]
-    intrinsics: [B, 3, 3] in pixel units
-    """
-    coordinates_h = homogenize_points(coordinates_xy)  # [B, N, 3]
-    intr_inv = torch.linalg.inv(intrinsics)            # [B, 3, 3]
-    rays = torch.matmul(intr_inv.unsqueeze(1), coordinates_h.unsqueeze(-1)).squeeze(-1)
-    return rays * z.unsqueeze(-1)
-def get_world_rays(
-    coordinates_xy: torch.Tensor,
-    extrinsics: torch.Tensor,
-    intrinsics: torch.Tensor,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    """Return world-space ray origins and directions.
-    coordinates_xy: [B, N, 2] in pixel coordinates (x, y)
-    extrinsics: [B, 4, 4] camera-to-world
-    intrinsics: [B, 3, 3] pixel intrinsics
-    """
-    ones = torch.ones_like(coordinates_xy[..., 0])
-    directions_cam = unproject(coordinates_xy, ones, intrinsics)
-    directions_cam = directions_cam / torch.clamp(directions_cam[..., 2:], min=1e-6)
-    directions_world = transform_cam2world(homogenize_vectors(directions_cam), extrinsics)[..., :3]
-    origins_world = extrinsics[:, None, :3, 3].expand_as(directions_world)
-    return origins_world, directions_world
-def sample_image_grid(h: int, w: int, device: torch.device) -> torch.Tensor:
-    """Return pixel center coordinates with shape [H*W, 2], order (x, y)."""
-    ys = torch.arange(h, device=device, dtype=torch.float32) + 0.5
-    xs = torch.arange(w, device=device, dtype=torch.float32) + 0.5
-    grid_y, grid_x = torch.meshgrid(ys, xs, indexing="ij")
-    return torch.stack([grid_x, grid_y], dim=-1).reshape(-1, 2)

InfiniDepth/gs/types.py DELETED Viewed

@@ -1,14 +0,0 @@
-from dataclasses import dataclass
-from typing import Optional
-import torch
-@dataclass
-class Gaussians:
-    means: torch.Tensor          # [B, N, 3]
-    harmonics: torch.Tensor      # [B, N, 3, d_sh]
-    opacities: torch.Tensor      # [B, N]
-    scales: torch.Tensor         # [B, N, 3]
-    rotations: torch.Tensor      # [B, N, 4]
-    covariances: Optional[torch.Tensor] = None

InfiniDepth/utils/depth_video_utils.py DELETED Viewed

@@ -1,250 +0,0 @@
-import os
-from typing import Optional
-import cv2
-import numpy as np
-import torch
-from .inference_utils import default_dir_by_input_file, default_video_file_by_input
-from .io_utils import filter_depth_noise_numpy
-from .io_utils import save_sampled_point_clouds
-from .moge_utils import estimate_metric_depth_with_moge2
-from .sampling_utils import SAMPLING_METHODS
-from .vis_utils import colorize_depth_maps
-def prepare_rgb_frame(
-    frame_bgr: np.ndarray,
-    input_size: tuple[int, int],
-    device: torch.device,
-) -> tuple[torch.Tensor, torch.Tensor, tuple[int, int]]:
-    frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
-    org_h, org_w = frame_rgb.shape[:2]
-    org_img = torch.from_numpy(frame_rgb).permute(2, 0, 1).unsqueeze(0).float() / 255.0
-    resized = cv2.resize(frame_rgb, input_size[::-1], interpolation=cv2.INTER_AREA)
-    image = torch.from_numpy(resized).permute(2, 0, 1).unsqueeze(0).float() / 255.0
-    image = image.to(device)
-    return org_img, image, (org_h, org_w)
-def depth_frame_to_metric_depth(depth_frame: np.ndarray, depth_video_scale: float) -> np.ndarray:
-    if depth_frame.ndim == 3:
-        # Assume grayscale/depth-like content is stored in channels.
-        depth_raw = depth_frame[:, :, 0]
-    else:
-        depth_raw = depth_frame
-    return depth_raw.astype(np.float32) / max(depth_video_scale, 1e-8)
-def sample_sparse_prompt(
-    depth: np.ndarray,
-    depth_mask: np.ndarray,
-    num_samples: int,
-) -> np.ndarray:
-    valid_depth = depth * depth_mask
-    if (valid_depth > 0.1).sum() <= num_samples:
-        return valid_depth
-    flat = valid_depth.reshape(-1)
-    nonzero_index = np.array(list(np.nonzero(flat > 0.1))).squeeze()
-    index = np.random.permutation(nonzero_index)[:num_samples]
-    sample_mask = np.ones_like(flat)
-    sample_mask[index] = 0.0
-    flat[sample_mask.astype(bool)] = 0.0
-    return flat.reshape(valid_depth.shape)
-def prepare_prompt_from_depth_frame(
-    depth_frame: np.ndarray,
-    input_size: tuple[int, int],
-    depth_video_scale: float,
-    num_samples: int,
-    min_prompt: float,
-    max_prompt: float,
-    enable_noise_filter: bool,
-    filter_std_threshold: float,
-    filter_median_threshold: float,
-    filter_gradient_threshold: float,
-    filter_min_neighbors: int,
-    device: torch.device,
-) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-    depth = depth_frame_to_metric_depth(depth_frame, depth_video_scale)
-    depth = cv2.resize(depth, input_size[::-1], interpolation=cv2.INTER_NEAREST)
-    if enable_noise_filter:
-        initial_mask = ((depth > min_prompt) & (depth < max_prompt)).astype(np.float32)
-        depth, depth_mask = filter_depth_noise_numpy(
-            depth=depth,
-            depth_mask=initial_mask,
-            std_threshold=filter_std_threshold,
-            median_threshold=filter_median_threshold,
-            gradient_threshold=filter_gradient_threshold,
-            min_neighbors=filter_min_neighbors,
-        )
-    else:
-        depth_mask = ((depth > min_prompt) & (depth < max_prompt)).astype(np.float32)
-    prompt_depth = sample_sparse_prompt(depth, depth_mask, num_samples=num_samples)
-    gt_depth_t = torch.from_numpy(depth).unsqueeze(0).unsqueeze(0).float().to(device)
-    prompt_depth_t = torch.from_numpy(prompt_depth).unsqueeze(0).unsqueeze(0).float().to(device)
-    depth_mask_t = torch.from_numpy(depth_mask).unsqueeze(0).unsqueeze(0).float().to(device)
-    return gt_depth_t, prompt_depth_t, depth_mask_t
-def ensure_depth_map(pred_depth: torch.Tensor, h_sample: int, w_sample: int) -> torch.Tensor:
-    if pred_depth.ndim == 4 and pred_depth.shape[-2:] == (h_sample, w_sample):
-        return pred_depth
-    if pred_depth.ndim == 3:
-        b, d1, d2 = pred_depth.shape
-        if d1 == h_sample * w_sample and d2 == 1:
-            return pred_depth.permute(0, 2, 1).reshape(b, 1, h_sample, w_sample)
-        if d1 == 1 and d2 == h_sample * w_sample:
-            return pred_depth.reshape(b, 1, h_sample, w_sample)
-        if d1 == h_sample and d2 == w_sample:
-            return pred_depth.unsqueeze(1)
-    raise ValueError(
-        f"Unsupported pred_depth shape: {tuple(pred_depth.shape)} for target ({h_sample}, {w_sample})"
-    )
-def build_query_coords(
-    h_sample: int,
-    w_sample: int,
-    device: torch.device,
-) -> torch.Tensor:
-    return SAMPLING_METHODS["2d_uniform"]((h_sample, w_sample)).unsqueeze(0).to(device)
-def resolve_video_output_paths(
-    input_video_path: str,
-    depth_output_video_path: Optional[str],
-    pcd_output_dir: Optional[str],
-    save_depth_video: bool,
-    save_pcd: bool,
-) -> tuple[str, str]:
-    resolved_depth_video_path = depth_output_video_path or default_video_file_by_input(
-        input_video_path,
-        "pred_depth_video",
-        "pred_depth.mp4",
-    )
-    resolved_pcd_output_dir = pcd_output_dir or default_dir_by_input_file(input_video_path, "pred_pcd_frames")
-    if save_depth_video:
-        os.makedirs(os.path.dirname(resolved_depth_video_path) or ".", exist_ok=True)
-    if save_pcd:
-        os.makedirs(resolved_pcd_output_dir, exist_ok=True)
-    return resolved_depth_video_path, resolved_pcd_output_dir
-def prepare_video_prompt(
-    depth_frame: Optional[np.ndarray],
-    image: torch.Tensor,
-    input_size: tuple[int, int],
-    depth_video_scale: float,
-    num_samples: int,
-    min_prompt: float,
-    max_prompt: float,
-    enable_noise_filter: bool,
-    filter_std_threshold: float,
-    filter_median_threshold: float,
-    filter_gradient_threshold: float,
-    filter_min_neighbors: int,
-    moge2_pretrained: str,
-    moge2_use_fp16: bool,
-    moge2_resolution_level: int,
-    moge2_num_tokens: Optional[int],
-    moge2_threshold: float,
-    device: torch.device,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    if depth_frame is not None:
-        _, prompt_depth, depth_mask = prepare_prompt_from_depth_frame(
-            depth_frame=depth_frame,
-            input_size=input_size,
-            depth_video_scale=depth_video_scale,
-            num_samples=num_samples,
-            min_prompt=min_prompt,
-            max_prompt=max_prompt,
-            enable_noise_filter=enable_noise_filter,
-            filter_std_threshold=filter_std_threshold,
-            filter_median_threshold=filter_median_threshold,
-            filter_gradient_threshold=filter_gradient_threshold,
-            filter_min_neighbors=filter_min_neighbors,
-            device=device,
-        )
-        return prompt_depth, depth_mask
-    pred_depth_prompt, depth_mask = estimate_metric_depth_with_moge2(
-        image=image,
-        pretrained_model_name_or_path=moge2_pretrained,
-        use_fp16=moge2_use_fp16,
-        resolution_level=moge2_resolution_level,
-        num_tokens=moge2_num_tokens,
-        threshold=moge2_threshold,
-    )
-    return pred_depth_prompt.to(device), depth_mask.to(device)
-def write_depth_video_frame(
-    pred_depthmap: torch.Tensor,
-    depth_writer: Optional[cv2.VideoWriter],
-    writer_size: Optional[tuple[int, int]],
-    depth_output_video_path: str,
-    final_fps: float,
-) -> tuple[cv2.VideoWriter, tuple[int, int]]:
-    depth_np = pred_depthmap[0, 0].detach().cpu().numpy()
-    valid = np.isfinite(depth_np) & (depth_np > 0)
-    if np.any(valid):
-        depth_min, depth_max = np.percentile(depth_np[valid], [1.0, 99.0]).tolist()
-        if depth_max <= depth_min:
-            depth_max = depth_min + 1e-6
-    else:
-        depth_min, depth_max = 0.0, 1.0
-    vis_depth = colorize_depth_maps(depth_np, min_depth=depth_min, max_depth=depth_max, cmap="Spectral")
-    vis_bgr = cv2.cvtColor(vis_depth, cv2.COLOR_RGB2BGR)
-    if depth_writer is None:
-        writer_size = (vis_bgr.shape[1], vis_bgr.shape[0])
-        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
-        depth_writer = cv2.VideoWriter(depth_output_video_path, fourcc, float(final_fps), writer_size)
-        if not depth_writer.isOpened():
-            raise RuntimeError(f"Failed to open depth video writer: {depth_output_video_path}")
-    if writer_size is None:
-        raise RuntimeError("writer_size should not be None after depth_writer initialization.")
-    if (vis_bgr.shape[1], vis_bgr.shape[0]) != writer_size:
-        vis_bgr = cv2.resize(vis_bgr, writer_size, interpolation=cv2.INTER_AREA)
-    depth_writer.write(vis_bgr)
-    return depth_writer, writer_size
-def save_video_frame_point_cloud(
-    query_2d_uniform_coord: torch.Tensor,
-    pred_2d_uniform_depth: torch.Tensor,
-    image: torch.Tensor,
-    fx: float,
-    fy: float,
-    cx: float,
-    cy: float,
-    pcd_output_dir: str,
-    frame_id: int,
-    enable_filter_flying_points: bool,
-) -> str:
-    pcd_save_path = os.path.join(pcd_output_dir, f"frame_{frame_id:06d}.ply")
-    save_sampled_point_clouds(
-        sampled_coord=query_2d_uniform_coord.squeeze(0).detach().cpu(),
-        sampled_depth=pred_2d_uniform_depth.squeeze(0).squeeze(-1).detach().cpu(),
-        rgb_image=image.squeeze(0).detach().cpu(),
-        fx=fx,
-        fy=fy,
-        cx=cx,
-        cy=cy,
-        output_path=pcd_save_path,
-        filter_flying_points=enable_filter_flying_points,
-    )
-    return pcd_save_path

InfiniDepth/utils/gs_utils.py DELETED Viewed

@@ -1,289 +0,0 @@
-import math
-import os
-from typing import Optional
-import imageio.v2 as imageio
-import numpy as np
-import torch
-import torch.nn.functional as F
-from InfiniDepth.gs import Gaussians
-from InfiniDepth.gs.projection import homogenize_points, transform_cam2world, unproject
-def _safe_normalize(v: torch.Tensor, eps: float = 1e-8) -> torch.Tensor:
-    return v / torch.clamp(torch.norm(v), min=eps)
-def _look_at_c2w(position: torch.Tensor, target: torch.Tensor, up_hint: torch.Tensor) -> torch.Tensor:
-    forward = _safe_normalize(target - position)
-    # Camera basis is stored as [right, up, forward]. Using cross(forward, up)
-    # flips the x-axis and produces a horizontally mirrored render. Keep the
-    # original up hint and derive a right-handed basis instead.
-    right = torch.cross(up_hint, forward, dim=0)
-    if torch.norm(right) < 1e-6:
-        right = torch.cross(
-            torch.tensor([1.0, 0.0, 0.0], device=position.device, dtype=position.dtype),
-            forward,
-            dim=0,
-        )
-    right = _safe_normalize(right)
-    up = _safe_normalize(torch.cross(forward, right, dim=0))
-    c2w = torch.eye(4, device=position.device, dtype=position.dtype)
-    c2w[:3, 0] = right
-    c2w[:3, 1] = up
-    c2w[:3, 2] = forward
-    c2w[:3, 3] = position
-    return c2w
-def _build_orbit_poses(
-    base_c2w: torch.Tensor,
-    target: torch.Tensor,
-    num_frames: int,
-    radius: float,
-    vertical: float,
-    forward_amp: float,
-) -> list[torch.Tensor]:
-    base_pos = base_c2w[:3, 3]
-    right = base_c2w[:3, 0]
-    up = base_c2w[:3, 1]
-    forward = base_c2w[:3, 2]
-    poses: list[torch.Tensor] = []
-    n = max(2, int(num_frames))
-    for i in range(n):
-        theta = 2.0 * math.pi * float(i) / float(n)
-        offset = (
-            right * (radius * math.sin(theta))
-            + up * (vertical * math.sin(2.0 * theta))
-            + forward * (forward_amp * 0.5 * (1.0 - math.cos(theta)))
-        )
-        pos = base_pos + offset
-        poses.append(_look_at_c2w(pos, target, up))
-    return poses
-def _build_swing_poses(
-    base_c2w: torch.Tensor,
-    num_frames: int,
-    radius: float,
-    forward_amp: float,
-) -> list[torch.Tensor]:
-    base_pos = base_c2w[:3, 3]
-    right = base_c2w[:3, 0]
-    forward = base_c2w[:3, 2]
-    key_offsets = [
-        torch.zeros(3, device=base_pos.device, dtype=base_pos.dtype),
-        -right * radius,
-        right * radius,
-        forward * forward_amp,
-        torch.zeros(3, device=base_pos.device, dtype=base_pos.dtype),
-    ]
-    poses: list[torch.Tensor] = []
-    seg_frames = max(1, int(num_frames) // (len(key_offsets) - 1))
-    for seg in range(len(key_offsets) - 1):
-        p0 = base_pos + key_offsets[seg]
-        p1 = base_pos + key_offsets[seg + 1]
-        for i in range(seg_frames):
-            alpha = 1.0 if seg_frames == 1 else float(i) / float(seg_frames - 1)
-            pos = (1.0 - alpha) * p0 + alpha * p1
-            pose = base_c2w.clone()
-            pose[:3, 3] = pos
-            if seg > 0 and i == 0:
-                continue
-            poses.append(pose)
-    return poses
-def _scale_intrinsics_for_render(
-    intrinsics: torch.Tensor,
-    src_h: int,
-    src_w: int,
-    dst_h: int,
-    dst_w: int,
-) -> torch.Tensor:
-    scaled = intrinsics.clone()
-    sx = float(dst_w) / float(src_w)
-    sy = float(dst_h) / float(src_h)
-    scaled[0, 0] *= sx
-    scaled[1, 1] *= sy
-    scaled[0, 2] *= sx
-    scaled[1, 2] *= sy
-    return scaled
-def _render_gaussian_frame(
-    rasterization_fn,
-    means: torch.Tensor,
-    harmonics: torch.Tensor,
-    opacities: torch.Tensor,
-    scales: torch.Tensor,
-    rotations: torch.Tensor,
-    c2w: torch.Tensor,
-    intrinsics: torch.Tensor,
-    render_h: int,
-    render_w: int,
-    bg_color: tuple[float, float, float],
-) -> np.ndarray:
-    xyzs = means.unsqueeze(0).float()  # [1, N, 3]
-    opacitys = opacities.unsqueeze(0).float()  # [1, N]
-    rotations_b = rotations.unsqueeze(0).float()  # [1, N, 4]
-    scales_b = scales.unsqueeze(0).float()  # [1, N, 3]
-    # [N, 3, d_sh] -> [1, N, d_sh, 3]
-    features = harmonics.unsqueeze(0).permute(0, 1, 3, 2).contiguous().float()
-    d_sh = features.shape[-2]
-    sh_degree = int(round(math.sqrt(float(d_sh)) - 1.0))
-    w2c = torch.linalg.inv(c2w).unsqueeze(0).unsqueeze(0).float()  # [1, 1, 4, 4]
-    Ks = intrinsics.unsqueeze(0).unsqueeze(0).float()  # [1, 1, 3, 3]
-    backgrounds = torch.tensor(bg_color, dtype=torch.float32, device=xyzs.device).view(1, 1, 3)
-    rendering, _, _ = rasterization_fn(
-        xyzs,
-        rotations_b,
-        scales_b,
-        opacitys,
-        features,
-        w2c,
-        Ks,
-        render_w,
-        render_h,
-        sh_degree=sh_degree,
-        render_mode="RGB+D",
-        packed=False,
-        backgrounds=backgrounds,
-        covars=None,
-        eps2d=1e-8,
-    )
-    rgb = rendering[0, 0, :, :, :3].clamp(0.0, 1.0)
-    return (rgb * 255.0).to(torch.uint8).cpu().numpy()
-def _render_novel_video(
-    means: torch.Tensor,
-    harmonics: torch.Tensor,
-    opacities: torch.Tensor,
-    scales: torch.Tensor,
-    rotations: torch.Tensor,
-    base_c2w: torch.Tensor,
-    intrinsics: torch.Tensor,
-    render_h: int,
-    render_w: int,
-    video_path: str,
-    trajectory: str,
-    num_frames: int,
-    fps: int,
-    radius: float,
-    vertical: float,
-    forward_amp: float,
-    bg_color: tuple[float, float, float],
-) -> None:
-    try:
-        from gsplat import rasterization as rasterization_fn
-    except ImportError as exc:
-        raise RuntimeError("Novel-view rendering requires gsplat. Please install gsplat first.") from exc
-    target = means.mean(dim=0)
-    if trajectory == "swing":
-        poses = _build_swing_poses(base_c2w, num_frames, radius, forward_amp)
-    else:
-        poses = _build_orbit_poses(base_c2w, target, num_frames, radius, vertical, forward_amp)
-    video_dir = os.path.dirname(video_path)
-    if video_dir:
-        os.makedirs(video_dir, exist_ok=True)
-    try:
-        with imageio.get_writer(
-            video_path,
-            fps=float(max(1, fps)),
-            codec="libx264",
-            macro_block_size=1,
-        ) as writer:
-            for pose in poses:
-                frame_rgb = _render_gaussian_frame(
-                    rasterization_fn=rasterization_fn,
-                    means=means,
-                    harmonics=harmonics,
-                    opacities=opacities,
-                    scales=scales,
-                    rotations=rotations,
-                    c2w=pose,
-                    intrinsics=intrinsics,
-                    render_h=render_h,
-                    render_w=render_w,
-                    bg_color=bg_color,
-                )
-                writer.append_data(frame_rgb)
-    except Exception as exc:
-        raise RuntimeError(f"Failed to write video with imageio: {video_path}") from exc
-def _build_sparse_uniform_gaussians(
-    dense_gaussians,
-    query_3d_uniform_coord: torch.Tensor,
-    pred_depth_3d: torch.Tensor,
-    intrinsics: torch.Tensor,
-    extrinsics: torch.Tensor,
-    h: int,
-    w: int,
-) -> Gaussians:
-    """Convert dense pixel gaussians to sparse 3d-uniform gaussians.
-    """
-    if dense_gaussians.means.shape[0] != 1:
-        raise ValueError("Current strict-aligned sparse interpolation only supports batch size 1.")
-    sparse_coords_normalized = query_3d_uniform_coord[0]  # [N,2], [y,x]
-    sparse_depths = pred_depth_3d[0]  # [N,1]
-    # Convert normalized coordinates to pixel coordinates
-    p_y = ((sparse_coords_normalized[:, 0] + 1.0) * (h / 2.0)) - 0.5
-    p_x = ((sparse_coords_normalized[:, 1] + 1.0) * (w / 2.0)) - 0.5
-    xy_coords = torch.stack([p_x, p_y], dim=-1)  # [N,2], [x,y]
-    depth_values = sparse_depths.squeeze(-1)
-    camera_points = unproject(xy_coords.unsqueeze(0), depth_values.unsqueeze(0), intrinsics)[0]
-    camera_points_hom = homogenize_points(camera_points)
-    world_points = transform_cam2world(camera_points_hom.unsqueeze(0), extrinsics)[0]
-    sparse_pts_world = world_points[..., :3]  # [N,3]
-    grid = sparse_coords_normalized[:, [1, 0]].unsqueeze(0).unsqueeze(0)  # [1,1,N,2]
-    def sample_attribute(attr):
-        if attr.dim() == 2:
-            attr_spatial = attr.view(1, 1, h, w)
-            sampled = F.grid_sample(attr_spatial, grid, mode="bilinear", align_corners=False)
-            return sampled.squeeze(0).squeeze(0)
-        if attr.dim() == 3:
-            d = attr.shape[-1]
-            attr_spatial = attr.view(1, h, w, d).permute(0, 3, 1, 2)
-            sampled = F.grid_sample(attr_spatial, grid, mode="bilinear", align_corners=False)
-            return sampled.squeeze(2).permute(0, 2, 1)
-        if attr.dim() == 4:
-            d1, d2 = attr.shape[-2:]
-            attr_flat = attr.view(1, h, w, d1 * d2).permute(0, 3, 1, 2)
-            sampled = F.grid_sample(attr_flat, grid, mode="bilinear", align_corners=False)
-            return sampled.squeeze(2).permute(0, 2, 1).view(1, -1, d1, d2)
-        raise ValueError(f"Unsupported attribute dimension: {attr.dim()}")
-    sparse_harmonics = sample_attribute(dense_gaussians.harmonics)
-    sparse_opacities = sample_attribute(dense_gaussians.opacities)
-    sparse_scales = sample_attribute(dense_gaussians.scales)
-    sparse_rotations = sample_attribute(dense_gaussians.rotations)
-    sparse_rotations = sparse_rotations / (torch.norm(sparse_rotations, dim=-1, keepdim=True) + 1e-8)
-    return Gaussians(
-        means=sparse_pts_world.unsqueeze(0),
-        covariances=None,
-        harmonics=sparse_harmonics,
-        opacities=sparse_opacities,
-        scales=sparse_scales,
-        rotations=sparse_rotations,
-    )

InfiniDepth/utils/inference_utils.py CHANGED Viewed

@@ -6,8 +6,6 @@ import cv2
 import torch
 import torch.nn.functional as F
-from InfiniDepth.gs import Gaussians
 from .io_utils import load_depth
 from .moge_utils import estimate_metric_depth_with_moge2
 from .vis_utils import build_sky_model, run_skyseg
@@ -331,67 +329,4 @@ def build_camera_matrices(
         device=device,
     ).unsqueeze(0).expand(batch, -1, -1)
     extrinsics = torch.eye(4, dtype=torch.float32, device=device).unsqueeze(0).expand(batch, -1, -1)
-    return fx, fy, cx, cy, intrinsics, extrinsics
-def filter_gaussians_by_depth_ratio(
-    pixel_gaussians: Gaussians,
-    extrinsics: torch.Tensor,
-    keep_far_ratio: float,
-) -> tuple[Gaussians, int, int, float, float]:
-    camera_position = extrinsics[0, :3, 3]
-    gaussian_means = pixel_gaussians.means[0]
-    distances = torch.norm(gaussian_means - camera_position.unsqueeze(0), dim=-1)
-    max_depth = distances.max()
-    depth_threshold = max_depth * keep_far_ratio
-    near_mask = distances <= depth_threshold
-    num_filtered = int((~near_mask).sum().item())
-    num_kept = int(near_mask.sum().item())
-    filtered_gaussians = Gaussians(
-        means=pixel_gaussians.means[:, near_mask, :],
-        covariances=None,
-        harmonics=pixel_gaussians.harmonics[:, near_mask, :, :],
-        opacities=pixel_gaussians.opacities[:, near_mask],
-        scales=pixel_gaussians.scales[:, near_mask, :],
-        rotations=pixel_gaussians.rotations[:, near_mask, :],
-    )
-    return filtered_gaussians, num_filtered, num_kept, float(depth_threshold.item()), float(max_depth.item())
-def filter_gaussians_by_min_opacity(pixel_gaussians: Gaussians, min_opacity: float) -> Gaussians:
-    if min_opacity <= 0.0:
-        return pixel_gaussians
-    keep = pixel_gaussians.opacities[0] >= min_opacity
-    return Gaussians(
-        means=pixel_gaussians.means[:, keep, :],
-        covariances=None,
-        harmonics=pixel_gaussians.harmonics[:, keep, :, :],
-        opacities=pixel_gaussians.opacities[:, keep],
-        scales=pixel_gaussians.scales[:, keep, :],
-        rotations=pixel_gaussians.rotations[:, keep, :],
-    )
-def unpack_gaussians_for_export(
-    pixel_gaussians: Gaussians,
-) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-    return (
-        pixel_gaussians.means[0],
-        pixel_gaussians.harmonics[0],
-        pixel_gaussians.opacities[0],
-        pixel_gaussians.scales[0],
-        pixel_gaussians.rotations[0],
-    )
-def resolve_ply_output_path(
-    input_image_path: str,
-    model_type: str,
-    output_ply_dir: Optional[str] = None,
-    output_ply_name: Optional[str] = None,
-) -> tuple[str, str]:
-    ply_dir = output_ply_dir or default_dir_by_input_file(input_image_path, "pred_gs")
-    os.makedirs(ply_dir, exist_ok=True)
-    stem = os.path.splitext(os.path.basename(input_image_path))[0]
-    ply_name = output_ply_name or f"{model_type}_{stem}_gaussians.ply"
-    return ply_dir, os.path.join(ply_dir, ply_name)

 import torch
 import torch.nn.functional as F
 from .io_utils import load_depth
 from .moge_utils import estimate_metric_depth_with_moge2
 from .vis_utils import build_sky_model, run_skyseg
         device=device,
     ).unsqueeze(0).expand(batch, -1, -1)
     extrinsics = torch.eye(4, dtype=torch.float32, device=device).unsqueeze(0).expand(batch, -1, -1)
+    return fx, fy, cx, cy, intrinsics, extrinsics