Spaces:

ritianyu
/

InfiniDepth

Running on Zero

App Files Files Community

ritianyu commited on about 1 month ago

Commit

b6640e8

1 Parent(s): 5e71ded

update

Browse files

Files changed (21) hide show

.gitignore +3 -0
InfiniDepth/model/block/pe.py +5 -2
InfiniDepth/model/block/perceive_io.py +32 -46
InfiniDepth/model/block/prompt_models/__pycache__/__init__.cpython-310.pyc +0 -0
InfiniDepth/model/block/prompt_models/__pycache__/rope.cpython-310.pyc +0 -0
InfiniDepth/model/block/prompt_models/__pycache__/sam.cpython-310.pyc +0 -0
InfiniDepth/model/block/prompt_models/__pycache__/selfattn.cpython-310.pyc +0 -0
InfiniDepth/model/block/prompt_models/rope.py +6 -2
InfiniDepth/model/block/prompt_models/selfattn.py +6 -2
InfiniDepth/model/block/prompt_models/utils/__pycache__/__init__.cpython-310.pyc +0 -0
InfiniDepth/model/block/prompt_models/utils/__pycache__/pe_utils.cpython-310.pyc +0 -0
InfiniDepth/model/block/prompt_models/utils/__pycache__/transformer.cpython-310.pyc +0 -0
InfiniDepth/model/block/utils.py +1 -1
InfiniDepth/model/model.py +5 -1
InfiniDepth/utils/__pycache__/hf_demo_utils.cpython-310.pyc +0 -0
InfiniDepth/utils/hf_demo_utils.py +29 -33
InfiniDepth/utils/io_utils.py +33 -3
InfiniDepth/utils/sampling_utils.py +1 -1
__pycache__/app.cpython-310.pyc +0 -0
app.py +22 -4
requirements.txt +1 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+checkpoints
+__pycache__
+.pyc

InfiniDepth/model/block/pe.py CHANGED Viewed

@@ -7,7 +7,11 @@ import torch.nn as nn
 import torch.nn.functional as F
 from typing import Any, Optional, Tuple, Dict
-acc_dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] >= 8 else torch.float16
 POS_EMB_REGISTRY = {}
@@ -217,4 +221,3 @@ def build_pos_emb(pos_emb_type="nerf", **kwargs):

 import torch.nn.functional as F
 from typing import Any, Optional, Tuple, Dict
+acc_dtype = (
+    torch.bfloat16
+    if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
+    else torch.float16
+)
 POS_EMB_REGISTRY = {}

InfiniDepth/model/block/perceive_io.py CHANGED Viewed

@@ -3,6 +3,7 @@ import torch
 from einops import rearrange
 from ...utils.logger import Log
 from torch import Tensor, nn
 try:
     from xformers.ops import memory_efficient_attention, unbind
@@ -97,42 +98,6 @@ class MemEffCrossAttention(CrossAttention):
         return x
-# class Attention(nn.Module):
-#     def __init__(
-#         self,
-#         dim: int,
-#         num_heads: int = 8,
-#         qkv_bias: bool = False,
-#         proj_bias: bool = True,
-#         attn_drop: float = 0.0,
-#         proj_drop: float = 0.0,
-#     ) -> None:
-#         super().__init__()
-#         self.num_heads = num_heads
-#         head_dim = dim // num_heads
-#         self.scale = head_dim**-0.5
-#         self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
-#         self.attn_drop = nn.Dropout(attn_drop)
-#         self.proj = nn.Linear(dim, dim, bias=proj_bias)
-#         self.proj_drop = nn.Dropout(proj_drop)
-#     def forward(self, x: Tensor) -> Tensor:
-#         B, N, C = x.shape
-#         qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
-#         q, k, v = qkv[0] * self.scale, qkv[1], qkv[2]
-#         attn = q @ k.transpose(-2, -1)
-#         attn = attn.softmax(dim=-1)
-#         attn = self.attn_drop(attn)
-#         x = (attn @ v).transpose(1, 2).reshape(B, N, C)
-#         x = self.proj(x)
-#         x = self.proj_drop(x)
-#         return x
 class Attention(nn.Module):
     def __init__(
         self,
@@ -142,7 +107,7 @@ class Attention(nn.Module):
         proj_bias: bool = True,
         attn_drop: float = 0.0,
         proj_drop: float = 0.0,
-        pe: str = "normal",
     ) -> None:
         super().__init__()
         self.num_heads = num_heads
@@ -160,20 +125,41 @@ class Attention(nn.Module):
     def forward(self, x: Tensor, x_pe: Tensor = None) -> Tensor:
         B, N, C = x.shape
-        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
-        q, k, v = qkv[0] * self.scale, qkv[1], qkv[2]
         if self.pe == "qk":
             q = self.norm1(q + x_pe)
             k = self.norm2(k + x_pe)
         elif self.pe == "apply":
             pass
-        attn = q @ k.transpose(-2, -1)
-        attn = attn.softmax(dim=-1)
-        attn = self.attn_drop(attn)
-        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
         x = self.proj(x)
         x = self.proj_drop(x)
         return x
@@ -188,7 +174,7 @@ class MemEffAttention(Attention):
     ) -> Tensor:
         if not XFORMERS_AVAILABLE:
             assert attn_bias is None, "xFormers is required for nested tensors usage"
-            return super().forward(x)
         B, N, C = x.shape
         qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads)

 from einops import rearrange
 from ...utils.logger import Log
 from torch import Tensor, nn
+import torch.nn.functional as F
 try:
     from xformers.ops import memory_efficient_attention, unbind
         return x
 class Attention(nn.Module):
     def __init__(
         self,
         proj_bias: bool = True,
         attn_drop: float = 0.0,
         proj_drop: float = 0.0,
+        pe: str = "qk",
     ) -> None:
         super().__init__()
         self.num_heads = num_heads
     def forward(self, x: Tensor, x_pe: Tensor = None) -> Tensor:
         B, N, C = x.shape
+        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads)
+        q, k, v = qkv.unbind(dim=2)
         if self.pe == "qk":
+            if x_pe is None:
+                raise ValueError("x_pe must be provided when pe='qk'")
+            x_pe = rearrange(x_pe, "b n (m c) -> b n m c", m=self.num_heads)
             q = self.norm1(q + x_pe)
             k = self.norm2(k + x_pe)
         elif self.pe == "apply":
             pass
+        # Keep behavior aligned with MemEffAttention when norm changes q/k dtype.
+        q = q.to(dtype=v.dtype)
+        k = k.to(dtype=v.dtype)
+        q = q.transpose(1, 2)
+        k = k.transpose(1, 2)
+        v = v.transpose(1, 2)
+        # Use SDPA and run attention kernel in float32 under mixed precision for better numerical stability.
+        if q.dtype in (torch.float16, torch.bfloat16):
+            q_attn = q.float()
+            k_attn = k.float()
+            v_attn = v.float()
+        else:
+            q_attn = q
+            k_attn = k
+            v_attn = v
+        x = F.scaled_dot_product_attention(
+            q_attn,
+            k_attn,
+            v_attn,
+            dropout_p=self.attn_drop.p if self.training else 0.0,
+        )
+        x = x.to(dtype=v.dtype).transpose(1, 2).reshape(B, N, C)
         x = self.proj(x)
         x = self.proj_drop(x)
         return x
     ) -> Tensor:
         if not XFORMERS_AVAILABLE:
             assert attn_bias is None, "xFormers is required for nested tensors usage"
+            return super().forward(x, x_pe)
         B, N, C = x.shape
         qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads)

InfiniDepth/model/block/prompt_models/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/InfiniDepth/model/block/prompt_models/__pycache__/__init__.cpython-310.pyc and b/InfiniDepth/model/block/prompt_models/__pycache__/__init__.cpython-310.pyc differ

InfiniDepth/model/block/prompt_models/__pycache__/rope.cpython-310.pyc CHANGED Viewed

Binary files a/InfiniDepth/model/block/prompt_models/__pycache__/rope.cpython-310.pyc and b/InfiniDepth/model/block/prompt_models/__pycache__/rope.cpython-310.pyc differ

InfiniDepth/model/block/prompt_models/__pycache__/sam.cpython-310.pyc CHANGED Viewed

Binary files a/InfiniDepth/model/block/prompt_models/__pycache__/sam.cpython-310.pyc and b/InfiniDepth/model/block/prompt_models/__pycache__/sam.cpython-310.pyc differ

InfiniDepth/model/block/prompt_models/__pycache__/selfattn.cpython-310.pyc CHANGED Viewed

Binary files a/InfiniDepth/model/block/prompt_models/__pycache__/selfattn.cpython-310.pyc and b/InfiniDepth/model/block/prompt_models/__pycache__/selfattn.cpython-310.pyc differ

InfiniDepth/model/block/prompt_models/rope.py CHANGED Viewed

@@ -3,7 +3,11 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
-acc_dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] >= 8 else torch.float16
 class PositionGetter:
@@ -209,4 +213,4 @@ class RotaryPositionEmbedding2D(nn.Module):
         )
     def forward(self, size: Tuple[int, int], device: torch.device) -> torch.Tensor:
-        return self.forward_encoding(size, device)

 import torch.nn as nn
 import torch.nn.functional as F
+acc_dtype = (
+    torch.bfloat16
+    if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
+    else torch.float16
+)
 class PositionGetter:
         )
     def forward(self, size: Tuple[int, int], device: torch.device) -> torch.Tensor:
+        return self.forward_encoding(size, device)

InfiniDepth/model/block/prompt_models/selfattn.py CHANGED Viewed

@@ -6,7 +6,11 @@ from .rope import RotaryPositionEmbedding2D
 from .utils.pe_utils import PositionEmbeddingRandom
 from torch import Tensor
-acc_dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] >= 8 else torch.float16
 class SelfAttnPromptModel(nn.Module):
@@ -283,4 +287,4 @@ class SelfAttenPromptBlock(nn.Module):
         else:
             context = x[:, x_len : x_len + context_len, :]
-        return query, context

 from .utils.pe_utils import PositionEmbeddingRandom
 from torch import Tensor
+acc_dtype = (
+    torch.bfloat16
+    if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
+    else torch.float16
+)
 class SelfAttnPromptModel(nn.Module):
         else:
             context = x[:, x_len : x_len + context_len, :]
+        return query, context

InfiniDepth/model/block/prompt_models/utils/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/InfiniDepth/model/block/prompt_models/utils/__pycache__/__init__.cpython-310.pyc and b/InfiniDepth/model/block/prompt_models/utils/__pycache__/__init__.cpython-310.pyc differ

InfiniDepth/model/block/prompt_models/utils/__pycache__/pe_utils.cpython-310.pyc CHANGED Viewed

Binary files a/InfiniDepth/model/block/prompt_models/utils/__pycache__/pe_utils.cpython-310.pyc and b/InfiniDepth/model/block/prompt_models/utils/__pycache__/pe_utils.cpython-310.pyc differ

InfiniDepth/model/block/prompt_models/utils/__pycache__/transformer.cpython-310.pyc CHANGED Viewed

Binary files a/InfiniDepth/model/block/prompt_models/utils/__pycache__/transformer.cpython-310.pyc and b/InfiniDepth/model/block/prompt_models/utils/__pycache__/transformer.cpython-310.pyc differ

InfiniDepth/model/block/utils.py CHANGED Viewed

@@ -111,7 +111,7 @@ def make_coord(shape, ranges=None, flatten=True):
         r = (v1 - v0) / (2 * n)
         seq = v0 + r + (2 * r) * torch.arange(n).float()
         coord_seqs.append(seq)
-    ret = torch.stack(torch.meshgrid(*coord_seqs), dim=-1)
     if flatten:
         ret = ret.view(-1, ret.shape[-1])
     return ret

         r = (v1 - v0) / (2 * n)
         seq = v0 + r + (2 * r) * torch.arange(n).float()
         coord_seqs.append(seq)
+    ret = torch.stack(torch.meshgrid(*coord_seqs, indexing="ij"), dim=-1)
     if flatten:
         ret = ret.view(-1, ret.shape[-1])
     return ret

InfiniDepth/model/model.py CHANGED Viewed

@@ -17,7 +17,11 @@ from .block.prompt_models import GeneralPromptModel, SelfAttnPromptModel
 from .block.implicit_decoder import ImplicitHead
 from .block.convolution import BasicEncoder
-acc_dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] >= 8 else torch.float16
 def _resolve_local_dinov3_repo() -> str:

 from .block.implicit_decoder import ImplicitHead
 from .block.convolution import BasicEncoder
+acc_dtype = (
+    torch.bfloat16
+    if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
+    else torch.float16
+)
 def _resolve_local_dinov3_repo() -> str:

InfiniDepth/utils/__pycache__/hf_demo_utils.cpython-310.pyc CHANGED Viewed

Binary files a/InfiniDepth/utils/__pycache__/hf_demo_utils.cpython-310.pyc and b/InfiniDepth/utils/__pycache__/hf_demo_utils.cpython-310.pyc differ

InfiniDepth/utils/hf_demo_utils.py CHANGED Viewed

@@ -16,16 +16,17 @@ from .inference_utils import (
     resolve_camera_intrinsics,
     resolve_output_size_from_mode,
 )
-from .io_utils import depth2pcd
 from .model_utils import build_model
-from .sampling_utils import make_2d_uniform_coord, make_3d_uniform_coord_triangle
 from .vis_utils import clip_outliers_by_percentile, colorize_depth_maps
 DEFAULT_MODEL_PATHS = {
-    "InfiniDepth": "checkpoints/depth/InfiniDepth.ckpt",
-    "InfiniDepth_DC": "checkpoints/depth/InfiniDepth_DC.ckpt",
 }
 DEFAULT_MOGE2_PRETRAINED = os.getenv("INFINIDEPTH_MOGE2_PRETRAINED", "Ruicheng/moge-2-vitl-normal")
@@ -144,6 +145,7 @@ def _build_and_save_point_cloud(
     cx: float,
     cy: float,
     output_path: str,
 ) -> tuple[np.ndarray, np.ndarray]:
     ixt = np.array([[fx, 0.0, cx], [0.0, fy, cy], [0.0, 0.0, 1.0]], dtype=np.float32)
     pcd = depth2pcd(
@@ -164,7 +166,14 @@ def _build_and_save_point_cloud(
     if not o3d.io.write_point_cloud(output_path, pcd):
         raise RuntimeError(f"Failed to save point cloud to {output_path}")
-    return np.asarray(pcd.points), np.asarray(pcd.colors)
 def resolve_checkpoint_path(model_type: str) -> str:
@@ -233,12 +242,18 @@ def run_single_image_demo(
         upsample_ratio=upsample_ratio,
     )
     gt_depth, prompt_depth, gt_depth_mask, prompt_mask, depth_source_label = _resolve_depth_inputs(
         depth_path=depth_path,
         input_size=input_size,
         image=image,
         device=device,
     )
     ckpt_path = resolve_checkpoint_path(model_type)
     model_cache = model_cache or ModelCache()
@@ -248,10 +263,10 @@ def run_single_image_demo(
     pred_depth, _ = model.inference(
         image=image,
         query_coord=query_2d_uniform_coord,
-        gt_depth=gt_depth,
         gt_depth_mask=gt_depth_mask,
-        prompt_depth=prompt_depth if model_type == "InfiniDepth_DC" else None,
-        prompt_mask=prompt_mask if model_type == "InfiniDepth_DC" else None,
     )
     pred_depthmap = pred_depth.permute(0, 2, 1).reshape(1, 1, h_out, w_out)
@@ -265,45 +280,26 @@ def run_single_image_demo(
         cy_org=cy,
         org_h=org_h,
         org_w=org_w,
-        h=h_out,
-        w=w_out,
         device=device,
     )
-    rgb_out_np = _resize_rgb_image(image_np, (h_out, w_out))
-    rgb_out_tensor = _image_tensor_from_numpy(rgb_out_np, device)
-    query_3d_uniform_coord = make_3d_uniform_coord_triangle(
-        depth_hw=pred_depthmap[0, 0],
-        fx=fx_out,
-        fy=fy_out,
-        cx=cx_out,
-        cy=cy_out,
-        N=int(max_points_preview),
-        deterministic=True,
-    ).unsqueeze(0)
-    pred_depth_3d, _ = model.inference(
-        image=image,
-        query_coord=query_3d_uniform_coord,
-        gt_depth=gt_depth,
-        gt_depth_mask=gt_depth_mask,
-        prompt_depth=prompt_depth if model_type == "InfiniDepth_DC" else None,
-        prompt_mask=prompt_mask if model_type == "InfiniDepth_DC" else None,
-    )
     output_dir = tempfile.mkdtemp(prefix="infinidepth_demo_")
     ply_path = os.path.join(output_dir, f"{model_type}_point_cloud.ply")
     depth_npy_path = os.path.join(output_dir, f"{model_type}_depth.npy")
     np.save(depth_npy_path, pred_depthmap[0, 0].detach().cpu().numpy().astype(np.float32))
     xyz, rgb = _build_and_save_point_cloud(
-        query_coord=query_3d_uniform_coord,
-        pred_depth=pred_depth_3d,
-        rgb_image=rgb_out_tensor,
         fx=float(fx_out),
         fy=float(fy_out),
         cx=float(cx_out),
         cy=float(cy_out),
         output_path=ply_path,
     )
     return DemoResult(

     resolve_camera_intrinsics,
     resolve_output_size_from_mode,
 )
+from .io_utils import depth2pcd, depth_to_disparity
 from .model_utils import build_model
+from .sampling_utils import make_2d_uniform_coord
 from .vis_utils import clip_outliers_by_percentile, colorize_depth_maps
 DEFAULT_MODEL_PATHS = {
+    "InfiniDepth": "checkpoints/depth/infinidepth.ckpt",
+    "InfiniDepth_DC": "checkpoints/depth/infinidepth_dc.ckpt",
 }
+os.environ.setdefault("INFINIDEPTH_MOGE2_PRETRAINED", "checkpoints/depth/moge2.pt")
 DEFAULT_MOGE2_PRETRAINED = os.getenv("INFINIDEPTH_MOGE2_PRETRAINED", "Ruicheng/moge-2-vitl-normal")
     cx: float,
     cy: float,
     output_path: str,
+    max_points_preview: int,
 ) -> tuple[np.ndarray, np.ndarray]:
     ixt = np.array([[fx, 0.0, cx], [0.0, fy, cy], [0.0, 0.0, 1.0]], dtype=np.float32)
     pcd = depth2pcd(
     if not o3d.io.write_point_cloud(output_path, pcd):
         raise RuntimeError(f"Failed to save point cloud to {output_path}")
+    xyz = np.asarray(pcd.points)
+    rgb = np.asarray(pcd.colors)
+    if xyz.shape[0] > max_points_preview:
+        index = np.random.choice(xyz.shape[0], int(max_points_preview), replace=False)
+        xyz = xyz[index]
+        rgb = rgb[index]
+    return xyz, rgb
 def resolve_checkpoint_path(model_type: str) -> str:
         upsample_ratio=upsample_ratio,
     )
+    if model_type == "InfiniDepth_DC":
+        assert depth_path is not None and os.path.exists(depth_path), "InfiniDepth_DC requires a valid input depth map for depth completion. Please provide --input_depth_path."
     gt_depth, prompt_depth, gt_depth_mask, prompt_mask, depth_source_label = _resolve_depth_inputs(
         depth_path=depth_path,
         input_size=input_size,
         image=image,
         device=device,
     )
+    gt = depth_to_disparity(gt_depth)
+    prompt = depth_to_disparity(prompt_depth)
+    prompt_mask = prompt > 0
     ckpt_path = resolve_checkpoint_path(model_type)
     model_cache = model_cache or ModelCache()
     pred_depth, _ = model.inference(
         image=image,
         query_coord=query_2d_uniform_coord,
+        gt_depth=gt,
         gt_depth_mask=gt_depth_mask,
+        prompt_depth=prompt,
+        prompt_mask=prompt_mask,
     )
     pred_depthmap = pred_depth.permute(0, 2, 1).reshape(1, 1, h_out, w_out)
         cy_org=cy,
         org_h=org_h,
         org_w=org_w,
+        h=h_in,
+        w=w_in,
         device=device,
     )
     output_dir = tempfile.mkdtemp(prefix="infinidepth_demo_")
     ply_path = os.path.join(output_dir, f"{model_type}_point_cloud.ply")
     depth_npy_path = os.path.join(output_dir, f"{model_type}_depth.npy")
     np.save(depth_npy_path, pred_depthmap[0, 0].detach().cpu().numpy().astype(np.float32))
     xyz, rgb = _build_and_save_point_cloud(
+        query_coord=query_2d_uniform_coord,
+        pred_depth=pred_depth,
+        rgb_image=image,
         fx=float(fx_out),
         fy=float(fy_out),
         cx=float(cx_out),
         cy=float(cy_out),
         output_path=ply_path,
+        max_points_preview=int(max_points_preview),
     )
     return DemoResult(

InfiniDepth/utils/io_utils.py CHANGED Viewed

@@ -377,6 +377,24 @@ def depth2pcd(
 ):
     device = sampled_coord.device if torch.is_tensor(sampled_coord) else torch.device("cpu")
     if rgb_image.ndim == 3 and rgb_image.shape[0] == 3:
         rgb_image = rgb_image.unsqueeze(0)
     elif rgb_image.ndim == 3 and rgb_image.shape[2] == 3:
@@ -398,7 +416,7 @@ def depth2pcd(
     p_y = ((sampled_coord[:, 0] + 1) * (rgb_image.shape[2] / 2.0)) - 0.5  # h --> y
     ones = torch.ones_like(p_x)
-    cam_coords = torch.stack([p_x, p_y, ones], dim=1) * sampled_depth.unsqueeze(1)
     cam_coords = cam_coords @ torch.inverse(torch.from_numpy(ixt).float().to(device)).T
     if ext is not None:
@@ -420,9 +438,21 @@ def depth2pcd(
         if clip_box[4] is not None: mask &= cam_coords[:, 1] >= clip_box[4]
         if clip_box[5] is not None: mask &= cam_coords[:, 2] >= clip_box[5]
     pcd = o3d.geometry.PointCloud()
-    pcd.points = o3d.utility.Vector3dVector(cam_coords[mask, :3].cpu().numpy())
-    pcd.colors = o3d.utility.Vector3dVector(sampled_rgb[mask].cpu().numpy())
     if ret_mask:
         return pcd, mask

 ):
     device = sampled_coord.device if torch.is_tensor(sampled_coord) else torch.device("cpu")
+    if not torch.is_tensor(sampled_coord):
+        sampled_coord = torch.as_tensor(sampled_coord, dtype=torch.float32, device=device)
+    else:
+        sampled_coord = sampled_coord.to(device=device, dtype=torch.float32)
+    if not torch.is_tensor(sampled_depth):
+        sampled_depth = torch.as_tensor(sampled_depth, dtype=torch.float32, device=device)
+    else:
+        sampled_depth = sampled_depth.to(device=device, dtype=torch.float32)
+    sampled_coord = sampled_coord.reshape(-1, 2)
+    sampled_depth = sampled_depth.reshape(-1)
+    if sampled_coord.shape[0] != sampled_depth.shape[0]:
+        raise ValueError(
+            f"sampled_coord and sampled_depth must contain the same number of points, "
+            f"got {sampled_coord.shape[0]} and {sampled_depth.shape[0]}"
+        )
     if rgb_image.ndim == 3 and rgb_image.shape[0] == 3:
         rgb_image = rgb_image.unsqueeze(0)
     elif rgb_image.ndim == 3 and rgb_image.shape[2] == 3:
     p_y = ((sampled_coord[:, 0] + 1) * (rgb_image.shape[2] / 2.0)) - 0.5  # h --> y
     ones = torch.ones_like(p_x)
+    cam_coords = torch.stack([p_x, p_y, ones], dim=1) * sampled_depth[:, None]
     cam_coords = cam_coords @ torch.inverse(torch.from_numpy(ixt).float().to(device)).T
     if ext is not None:
         if clip_box[4] is not None: mask &= cam_coords[:, 1] >= clip_box[4]
         if clip_box[5] is not None: mask &= cam_coords[:, 2] >= clip_box[5]
+    points_np = np.ascontiguousarray(
+        cam_coords[mask, :3].detach().cpu().to(torch.float64).numpy()
+    )
+    colors_np = np.ascontiguousarray(
+        sampled_rgb[mask].detach().cpu().to(torch.float64).numpy()
+    )
+    if points_np.ndim != 2 or points_np.shape[1] != 3:
+        raise ValueError(f"Point cloud points must have shape [N, 3], got {points_np.shape}")
+    if colors_np.ndim != 2 or colors_np.shape[1] != 3:
+        raise ValueError(f"Point cloud colors must have shape [N, 3], got {colors_np.shape}")
     pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(points_np)
+    pcd.colors = o3d.utility.Vector3dVector(colors_np)
     if ret_mask:
         return pcd, mask

InfiniDepth/utils/sampling_utils.py CHANGED Viewed

@@ -49,7 +49,7 @@ def make_2d_uniform_coord(shape, ranges=None, flatten=True):
         r = (v1 - v0) / (2 * n)
         seq = v0 + r + (2 * r) * torch.arange(n).float()
         coord_seqs.append(seq)
-    query_coords = torch.stack(torch.meshgrid(*coord_seqs), dim=-1)
     if flatten:
         query_coords = query_coords.view(-1, query_coords.shape[-1])
     return query_coords

         r = (v1 - v0) / (2 * n)
         seq = v0 + r + (2 * r) * torch.arange(n).float()
         coord_seqs.append(seq)
+    query_coords = torch.stack(torch.meshgrid(*coord_seqs, indexing="ij"), dim=-1)
     if flatten:
         query_coords = query_coords.view(-1, query_coords.shape[-1])
     return query_coords

__pycache__/app.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-310.pyc and b/__pycache__/app.cpython-310.pyc differ

app.py CHANGED Viewed

@@ -66,6 +66,23 @@ CUSTOM_CSS = """
 """
 def _none_if_invalid(value: Optional[float]) -> Optional[float]:
     if value is None:
         return None
@@ -195,7 +212,7 @@ with gr.Blocks(title="InfiniDepth Demo", theme=gr.themes.Soft(), css=CUSTOM_CSS)
                     label="Model Type",
                 )
                 input_size = gr.Dropdown(
-                    choices=["504x672", "768x1024"],
                     value="768x1024",
                     label="Inference Resolution (HxW)",
                 )
@@ -212,8 +229,8 @@ with gr.Blocks(title="InfiniDepth Demo", theme=gr.themes.Soft(), css=CUSTOM_CSS)
                     label="Super-resolution Ratio",
                 )
                 max_points_preview = gr.Slider(
-                    minimum=5000,
-                    maximum=120000,
                     value=60000,
                     step=5000,
                     label="Max Preview Points",
@@ -275,4 +292,5 @@ demo = demo.queue()
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))

 """
+def _ensure_localhost_bypasses_proxy() -> None:
+    localhost_hosts = ("localhost", "127.0.0.1", "::1")
+    for env_key in ("NO_PROXY", "no_proxy"):
+        current = os.environ.get(env_key, "")
+        items = [item.strip() for item in current.split(",") if item.strip()]
+        changed = False
+        for host in localhost_hosts:
+            if host not in items:
+                items.append(host)
+                changed = True
+        if changed or current:
+            os.environ[env_key] = ",".join(items)
+_ensure_localhost_bypasses_proxy()
 def _none_if_invalid(value: Optional[float]) -> Optional[float]:
     if value is None:
         return None
                     label="Model Type",
                 )
                 input_size = gr.Dropdown(
+                    choices=["512x672", "768x1024"],
                     value="768x1024",
                     label="Inference Resolution (HxW)",
                 )
                     label="Super-resolution Ratio",
                 )
                 max_points_preview = gr.Slider(
+                    minimum=10000,
+                    maximum=1000000,
                     value=60000,
                     step=5000,
                     label="Max Preview Points",
 if __name__ == "__main__":
+    server_name = "0.0.0.0" if os.getenv("SPACE_ID") else "127.0.0.1"
+    demo.launch(server_name=server_name, server_port=int(os.getenv("PORT", "7861")))

requirements.txt CHANGED Viewed

@@ -4,6 +4,7 @@
 torch==2.9.1
 torchvision==0.24.1
 torchaudio==2.9.1
 hydra-colorlog
 hydra-core
 h5py

 torch==2.9.1
 torchvision==0.24.1
 torchaudio==2.9.1
+xformers==0.0.33.post1
 hydra-colorlog
 hydra-core
 h5py