ritianyu commited on
Commit
b6640e8
·
1 Parent(s): 5e71ded
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ checkpoints
2
+ __pycache__
3
+ .pyc
InfiniDepth/model/block/pe.py CHANGED
@@ -7,7 +7,11 @@ import torch.nn as nn
7
  import torch.nn.functional as F
8
  from typing import Any, Optional, Tuple, Dict
9
 
10
- acc_dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] >= 8 else torch.float16
 
 
 
 
11
 
12
  POS_EMB_REGISTRY = {}
13
 
@@ -217,4 +221,3 @@ def build_pos_emb(pos_emb_type="nerf", **kwargs):
217
 
218
 
219
 
220
-
 
7
  import torch.nn.functional as F
8
  from typing import Any, Optional, Tuple, Dict
9
 
10
+ acc_dtype = (
11
+ torch.bfloat16
12
+ if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
13
+ else torch.float16
14
+ )
15
 
16
  POS_EMB_REGISTRY = {}
17
 
 
221
 
222
 
223
 
 
InfiniDepth/model/block/perceive_io.py CHANGED
@@ -3,6 +3,7 @@ import torch
3
  from einops import rearrange
4
  from ...utils.logger import Log
5
  from torch import Tensor, nn
 
6
 
7
  try:
8
  from xformers.ops import memory_efficient_attention, unbind
@@ -97,42 +98,6 @@ class MemEffCrossAttention(CrossAttention):
97
  return x
98
 
99
 
100
- # class Attention(nn.Module):
101
- # def __init__(
102
- # self,
103
- # dim: int,
104
- # num_heads: int = 8,
105
- # qkv_bias: bool = False,
106
- # proj_bias: bool = True,
107
- # attn_drop: float = 0.0,
108
- # proj_drop: float = 0.0,
109
- # ) -> None:
110
- # super().__init__()
111
- # self.num_heads = num_heads
112
- # head_dim = dim // num_heads
113
- # self.scale = head_dim**-0.5
114
-
115
- # self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
116
- # self.attn_drop = nn.Dropout(attn_drop)
117
- # self.proj = nn.Linear(dim, dim, bias=proj_bias)
118
- # self.proj_drop = nn.Dropout(proj_drop)
119
-
120
- # def forward(self, x: Tensor) -> Tensor:
121
- # B, N, C = x.shape
122
- # qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
123
-
124
- # q, k, v = qkv[0] * self.scale, qkv[1], qkv[2]
125
- # attn = q @ k.transpose(-2, -1)
126
-
127
- # attn = attn.softmax(dim=-1)
128
- # attn = self.attn_drop(attn)
129
-
130
- # x = (attn @ v).transpose(1, 2).reshape(B, N, C)
131
- # x = self.proj(x)
132
- # x = self.proj_drop(x)
133
- # return x
134
-
135
-
136
  class Attention(nn.Module):
137
  def __init__(
138
  self,
@@ -142,7 +107,7 @@ class Attention(nn.Module):
142
  proj_bias: bool = True,
143
  attn_drop: float = 0.0,
144
  proj_drop: float = 0.0,
145
- pe: str = "normal",
146
  ) -> None:
147
  super().__init__()
148
  self.num_heads = num_heads
@@ -160,20 +125,41 @@ class Attention(nn.Module):
160
 
161
  def forward(self, x: Tensor, x_pe: Tensor = None) -> Tensor:
162
  B, N, C = x.shape
163
- qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
164
-
165
- q, k, v = qkv[0] * self.scale, qkv[1], qkv[2]
166
  if self.pe == "qk":
 
 
 
167
  q = self.norm1(q + x_pe)
168
  k = self.norm2(k + x_pe)
169
  elif self.pe == "apply":
170
  pass
171
- attn = q @ k.transpose(-2, -1)
172
 
173
- attn = attn.softmax(dim=-1)
174
- attn = self.attn_drop(attn)
175
-
176
- x = (attn @ v).transpose(1, 2).reshape(B, N, C)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  x = self.proj(x)
178
  x = self.proj_drop(x)
179
  return x
@@ -188,7 +174,7 @@ class MemEffAttention(Attention):
188
  ) -> Tensor:
189
  if not XFORMERS_AVAILABLE:
190
  assert attn_bias is None, "xFormers is required for nested tensors usage"
191
- return super().forward(x)
192
 
193
  B, N, C = x.shape
194
  qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads)
 
3
  from einops import rearrange
4
  from ...utils.logger import Log
5
  from torch import Tensor, nn
6
+ import torch.nn.functional as F
7
 
8
  try:
9
  from xformers.ops import memory_efficient_attention, unbind
 
98
  return x
99
 
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  class Attention(nn.Module):
102
  def __init__(
103
  self,
 
107
  proj_bias: bool = True,
108
  attn_drop: float = 0.0,
109
  proj_drop: float = 0.0,
110
+ pe: str = "qk",
111
  ) -> None:
112
  super().__init__()
113
  self.num_heads = num_heads
 
125
 
126
  def forward(self, x: Tensor, x_pe: Tensor = None) -> Tensor:
127
  B, N, C = x.shape
128
+ qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads)
129
+ q, k, v = qkv.unbind(dim=2)
 
130
  if self.pe == "qk":
131
+ if x_pe is None:
132
+ raise ValueError("x_pe must be provided when pe='qk'")
133
+ x_pe = rearrange(x_pe, "b n (m c) -> b n m c", m=self.num_heads)
134
  q = self.norm1(q + x_pe)
135
  k = self.norm2(k + x_pe)
136
  elif self.pe == "apply":
137
  pass
 
138
 
139
+ # Keep behavior aligned with MemEffAttention when norm changes q/k dtype.
140
+ q = q.to(dtype=v.dtype)
141
+ k = k.to(dtype=v.dtype)
142
+
143
+ q = q.transpose(1, 2)
144
+ k = k.transpose(1, 2)
145
+ v = v.transpose(1, 2)
146
+
147
+ # Use SDPA and run attention kernel in float32 under mixed precision for better numerical stability.
148
+ if q.dtype in (torch.float16, torch.bfloat16):
149
+ q_attn = q.float()
150
+ k_attn = k.float()
151
+ v_attn = v.float()
152
+ else:
153
+ q_attn = q
154
+ k_attn = k
155
+ v_attn = v
156
+ x = F.scaled_dot_product_attention(
157
+ q_attn,
158
+ k_attn,
159
+ v_attn,
160
+ dropout_p=self.attn_drop.p if self.training else 0.0,
161
+ )
162
+ x = x.to(dtype=v.dtype).transpose(1, 2).reshape(B, N, C)
163
  x = self.proj(x)
164
  x = self.proj_drop(x)
165
  return x
 
174
  ) -> Tensor:
175
  if not XFORMERS_AVAILABLE:
176
  assert attn_bias is None, "xFormers is required for nested tensors usage"
177
+ return super().forward(x, x_pe)
178
 
179
  B, N, C = x.shape
180
  qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads)
InfiniDepth/model/block/prompt_models/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/InfiniDepth/model/block/prompt_models/__pycache__/__init__.cpython-310.pyc and b/InfiniDepth/model/block/prompt_models/__pycache__/__init__.cpython-310.pyc differ
 
InfiniDepth/model/block/prompt_models/__pycache__/rope.cpython-310.pyc CHANGED
Binary files a/InfiniDepth/model/block/prompt_models/__pycache__/rope.cpython-310.pyc and b/InfiniDepth/model/block/prompt_models/__pycache__/rope.cpython-310.pyc differ
 
InfiniDepth/model/block/prompt_models/__pycache__/sam.cpython-310.pyc CHANGED
Binary files a/InfiniDepth/model/block/prompt_models/__pycache__/sam.cpython-310.pyc and b/InfiniDepth/model/block/prompt_models/__pycache__/sam.cpython-310.pyc differ
 
InfiniDepth/model/block/prompt_models/__pycache__/selfattn.cpython-310.pyc CHANGED
Binary files a/InfiniDepth/model/block/prompt_models/__pycache__/selfattn.cpython-310.pyc and b/InfiniDepth/model/block/prompt_models/__pycache__/selfattn.cpython-310.pyc differ
 
InfiniDepth/model/block/prompt_models/rope.py CHANGED
@@ -3,7 +3,11 @@ import torch
3
  import torch.nn as nn
4
  import torch.nn.functional as F
5
 
6
- acc_dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] >= 8 else torch.float16
 
 
 
 
7
 
8
 
9
  class PositionGetter:
@@ -209,4 +213,4 @@ class RotaryPositionEmbedding2D(nn.Module):
209
  )
210
 
211
  def forward(self, size: Tuple[int, int], device: torch.device) -> torch.Tensor:
212
- return self.forward_encoding(size, device)
 
3
  import torch.nn as nn
4
  import torch.nn.functional as F
5
 
6
+ acc_dtype = (
7
+ torch.bfloat16
8
+ if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
9
+ else torch.float16
10
+ )
11
 
12
 
13
  class PositionGetter:
 
213
  )
214
 
215
  def forward(self, size: Tuple[int, int], device: torch.device) -> torch.Tensor:
216
+ return self.forward_encoding(size, device)
InfiniDepth/model/block/prompt_models/selfattn.py CHANGED
@@ -6,7 +6,11 @@ from .rope import RotaryPositionEmbedding2D
6
  from .utils.pe_utils import PositionEmbeddingRandom
7
  from torch import Tensor
8
 
9
- acc_dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] >= 8 else torch.float16
 
 
 
 
10
 
11
 
12
  class SelfAttnPromptModel(nn.Module):
@@ -283,4 +287,4 @@ class SelfAttenPromptBlock(nn.Module):
283
  else:
284
  context = x[:, x_len : x_len + context_len, :]
285
 
286
- return query, context
 
6
  from .utils.pe_utils import PositionEmbeddingRandom
7
  from torch import Tensor
8
 
9
+ acc_dtype = (
10
+ torch.bfloat16
11
+ if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
12
+ else torch.float16
13
+ )
14
 
15
 
16
  class SelfAttnPromptModel(nn.Module):
 
287
  else:
288
  context = x[:, x_len : x_len + context_len, :]
289
 
290
+ return query, context
InfiniDepth/model/block/prompt_models/utils/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/InfiniDepth/model/block/prompt_models/utils/__pycache__/__init__.cpython-310.pyc and b/InfiniDepth/model/block/prompt_models/utils/__pycache__/__init__.cpython-310.pyc differ
 
InfiniDepth/model/block/prompt_models/utils/__pycache__/pe_utils.cpython-310.pyc CHANGED
Binary files a/InfiniDepth/model/block/prompt_models/utils/__pycache__/pe_utils.cpython-310.pyc and b/InfiniDepth/model/block/prompt_models/utils/__pycache__/pe_utils.cpython-310.pyc differ
 
InfiniDepth/model/block/prompt_models/utils/__pycache__/transformer.cpython-310.pyc CHANGED
Binary files a/InfiniDepth/model/block/prompt_models/utils/__pycache__/transformer.cpython-310.pyc and b/InfiniDepth/model/block/prompt_models/utils/__pycache__/transformer.cpython-310.pyc differ
 
InfiniDepth/model/block/utils.py CHANGED
@@ -111,7 +111,7 @@ def make_coord(shape, ranges=None, flatten=True):
111
  r = (v1 - v0) / (2 * n)
112
  seq = v0 + r + (2 * r) * torch.arange(n).float()
113
  coord_seqs.append(seq)
114
- ret = torch.stack(torch.meshgrid(*coord_seqs), dim=-1)
115
  if flatten:
116
  ret = ret.view(-1, ret.shape[-1])
117
  return ret
 
111
  r = (v1 - v0) / (2 * n)
112
  seq = v0 + r + (2 * r) * torch.arange(n).float()
113
  coord_seqs.append(seq)
114
+ ret = torch.stack(torch.meshgrid(*coord_seqs, indexing="ij"), dim=-1)
115
  if flatten:
116
  ret = ret.view(-1, ret.shape[-1])
117
  return ret
InfiniDepth/model/model.py CHANGED
@@ -17,7 +17,11 @@ from .block.prompt_models import GeneralPromptModel, SelfAttnPromptModel
17
  from .block.implicit_decoder import ImplicitHead
18
  from .block.convolution import BasicEncoder
19
 
20
- acc_dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] >= 8 else torch.float16
 
 
 
 
21
 
22
 
23
  def _resolve_local_dinov3_repo() -> str:
 
17
  from .block.implicit_decoder import ImplicitHead
18
  from .block.convolution import BasicEncoder
19
 
20
+ acc_dtype = (
21
+ torch.bfloat16
22
+ if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
23
+ else torch.float16
24
+ )
25
 
26
 
27
  def _resolve_local_dinov3_repo() -> str:
InfiniDepth/utils/__pycache__/hf_demo_utils.cpython-310.pyc CHANGED
Binary files a/InfiniDepth/utils/__pycache__/hf_demo_utils.cpython-310.pyc and b/InfiniDepth/utils/__pycache__/hf_demo_utils.cpython-310.pyc differ
 
InfiniDepth/utils/hf_demo_utils.py CHANGED
@@ -16,16 +16,17 @@ from .inference_utils import (
16
  resolve_camera_intrinsics,
17
  resolve_output_size_from_mode,
18
  )
19
- from .io_utils import depth2pcd
20
  from .model_utils import build_model
21
- from .sampling_utils import make_2d_uniform_coord, make_3d_uniform_coord_triangle
22
  from .vis_utils import clip_outliers_by_percentile, colorize_depth_maps
23
 
24
 
25
  DEFAULT_MODEL_PATHS = {
26
- "InfiniDepth": "checkpoints/depth/InfiniDepth.ckpt",
27
- "InfiniDepth_DC": "checkpoints/depth/InfiniDepth_DC.ckpt",
28
  }
 
29
  DEFAULT_MOGE2_PRETRAINED = os.getenv("INFINIDEPTH_MOGE2_PRETRAINED", "Ruicheng/moge-2-vitl-normal")
30
 
31
 
@@ -144,6 +145,7 @@ def _build_and_save_point_cloud(
144
  cx: float,
145
  cy: float,
146
  output_path: str,
 
147
  ) -> tuple[np.ndarray, np.ndarray]:
148
  ixt = np.array([[fx, 0.0, cx], [0.0, fy, cy], [0.0, 0.0, 1.0]], dtype=np.float32)
149
  pcd = depth2pcd(
@@ -164,7 +166,14 @@ def _build_and_save_point_cloud(
164
  if not o3d.io.write_point_cloud(output_path, pcd):
165
  raise RuntimeError(f"Failed to save point cloud to {output_path}")
166
 
167
- return np.asarray(pcd.points), np.asarray(pcd.colors)
 
 
 
 
 
 
 
168
 
169
 
170
  def resolve_checkpoint_path(model_type: str) -> str:
@@ -233,12 +242,18 @@ def run_single_image_demo(
233
  upsample_ratio=upsample_ratio,
234
  )
235
 
 
 
 
236
  gt_depth, prompt_depth, gt_depth_mask, prompt_mask, depth_source_label = _resolve_depth_inputs(
237
  depth_path=depth_path,
238
  input_size=input_size,
239
  image=image,
240
  device=device,
241
  )
 
 
 
242
 
243
  ckpt_path = resolve_checkpoint_path(model_type)
244
  model_cache = model_cache or ModelCache()
@@ -248,10 +263,10 @@ def run_single_image_demo(
248
  pred_depth, _ = model.inference(
249
  image=image,
250
  query_coord=query_2d_uniform_coord,
251
- gt_depth=gt_depth,
252
  gt_depth_mask=gt_depth_mask,
253
- prompt_depth=prompt_depth if model_type == "InfiniDepth_DC" else None,
254
- prompt_mask=prompt_mask if model_type == "InfiniDepth_DC" else None,
255
  )
256
 
257
  pred_depthmap = pred_depth.permute(0, 2, 1).reshape(1, 1, h_out, w_out)
@@ -265,45 +280,26 @@ def run_single_image_demo(
265
  cy_org=cy,
266
  org_h=org_h,
267
  org_w=org_w,
268
- h=h_out,
269
- w=w_out,
270
  device=device,
271
  )
272
 
273
- rgb_out_np = _resize_rgb_image(image_np, (h_out, w_out))
274
- rgb_out_tensor = _image_tensor_from_numpy(rgb_out_np, device)
275
- query_3d_uniform_coord = make_3d_uniform_coord_triangle(
276
- depth_hw=pred_depthmap[0, 0],
277
- fx=fx_out,
278
- fy=fy_out,
279
- cx=cx_out,
280
- cy=cy_out,
281
- N=int(max_points_preview),
282
- deterministic=True,
283
- ).unsqueeze(0)
284
- pred_depth_3d, _ = model.inference(
285
- image=image,
286
- query_coord=query_3d_uniform_coord,
287
- gt_depth=gt_depth,
288
- gt_depth_mask=gt_depth_mask,
289
- prompt_depth=prompt_depth if model_type == "InfiniDepth_DC" else None,
290
- prompt_mask=prompt_mask if model_type == "InfiniDepth_DC" else None,
291
- )
292
-
293
  output_dir = tempfile.mkdtemp(prefix="infinidepth_demo_")
294
  ply_path = os.path.join(output_dir, f"{model_type}_point_cloud.ply")
295
  depth_npy_path = os.path.join(output_dir, f"{model_type}_depth.npy")
296
  np.save(depth_npy_path, pred_depthmap[0, 0].detach().cpu().numpy().astype(np.float32))
297
 
298
  xyz, rgb = _build_and_save_point_cloud(
299
- query_coord=query_3d_uniform_coord,
300
- pred_depth=pred_depth_3d,
301
- rgb_image=rgb_out_tensor,
302
  fx=float(fx_out),
303
  fy=float(fy_out),
304
  cx=float(cx_out),
305
  cy=float(cy_out),
306
  output_path=ply_path,
 
307
  )
308
 
309
  return DemoResult(
 
16
  resolve_camera_intrinsics,
17
  resolve_output_size_from_mode,
18
  )
19
+ from .io_utils import depth2pcd, depth_to_disparity
20
  from .model_utils import build_model
21
+ from .sampling_utils import make_2d_uniform_coord
22
  from .vis_utils import clip_outliers_by_percentile, colorize_depth_maps
23
 
24
 
25
  DEFAULT_MODEL_PATHS = {
26
+ "InfiniDepth": "checkpoints/depth/infinidepth.ckpt",
27
+ "InfiniDepth_DC": "checkpoints/depth/infinidepth_dc.ckpt",
28
  }
29
+ os.environ.setdefault("INFINIDEPTH_MOGE2_PRETRAINED", "checkpoints/depth/moge2.pt")
30
  DEFAULT_MOGE2_PRETRAINED = os.getenv("INFINIDEPTH_MOGE2_PRETRAINED", "Ruicheng/moge-2-vitl-normal")
31
 
32
 
 
145
  cx: float,
146
  cy: float,
147
  output_path: str,
148
+ max_points_preview: int,
149
  ) -> tuple[np.ndarray, np.ndarray]:
150
  ixt = np.array([[fx, 0.0, cx], [0.0, fy, cy], [0.0, 0.0, 1.0]], dtype=np.float32)
151
  pcd = depth2pcd(
 
166
  if not o3d.io.write_point_cloud(output_path, pcd):
167
  raise RuntimeError(f"Failed to save point cloud to {output_path}")
168
 
169
+ xyz = np.asarray(pcd.points)
170
+ rgb = np.asarray(pcd.colors)
171
+ if xyz.shape[0] > max_points_preview:
172
+ index = np.random.choice(xyz.shape[0], int(max_points_preview), replace=False)
173
+ xyz = xyz[index]
174
+ rgb = rgb[index]
175
+
176
+ return xyz, rgb
177
 
178
 
179
  def resolve_checkpoint_path(model_type: str) -> str:
 
242
  upsample_ratio=upsample_ratio,
243
  )
244
 
245
+ if model_type == "InfiniDepth_DC":
246
+ assert depth_path is not None and os.path.exists(depth_path), "InfiniDepth_DC requires a valid input depth map for depth completion. Please provide --input_depth_path."
247
+
248
  gt_depth, prompt_depth, gt_depth_mask, prompt_mask, depth_source_label = _resolve_depth_inputs(
249
  depth_path=depth_path,
250
  input_size=input_size,
251
  image=image,
252
  device=device,
253
  )
254
+ gt = depth_to_disparity(gt_depth)
255
+ prompt = depth_to_disparity(prompt_depth)
256
+ prompt_mask = prompt > 0
257
 
258
  ckpt_path = resolve_checkpoint_path(model_type)
259
  model_cache = model_cache or ModelCache()
 
263
  pred_depth, _ = model.inference(
264
  image=image,
265
  query_coord=query_2d_uniform_coord,
266
+ gt_depth=gt,
267
  gt_depth_mask=gt_depth_mask,
268
+ prompt_depth=prompt,
269
+ prompt_mask=prompt_mask,
270
  )
271
 
272
  pred_depthmap = pred_depth.permute(0, 2, 1).reshape(1, 1, h_out, w_out)
 
280
  cy_org=cy,
281
  org_h=org_h,
282
  org_w=org_w,
283
+ h=h_in,
284
+ w=w_in,
285
  device=device,
286
  )
287
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  output_dir = tempfile.mkdtemp(prefix="infinidepth_demo_")
289
  ply_path = os.path.join(output_dir, f"{model_type}_point_cloud.ply")
290
  depth_npy_path = os.path.join(output_dir, f"{model_type}_depth.npy")
291
  np.save(depth_npy_path, pred_depthmap[0, 0].detach().cpu().numpy().astype(np.float32))
292
 
293
  xyz, rgb = _build_and_save_point_cloud(
294
+ query_coord=query_2d_uniform_coord,
295
+ pred_depth=pred_depth,
296
+ rgb_image=image,
297
  fx=float(fx_out),
298
  fy=float(fy_out),
299
  cx=float(cx_out),
300
  cy=float(cy_out),
301
  output_path=ply_path,
302
+ max_points_preview=int(max_points_preview),
303
  )
304
 
305
  return DemoResult(
InfiniDepth/utils/io_utils.py CHANGED
@@ -377,6 +377,24 @@ def depth2pcd(
377
  ):
378
  device = sampled_coord.device if torch.is_tensor(sampled_coord) else torch.device("cpu")
379
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
  if rgb_image.ndim == 3 and rgb_image.shape[0] == 3:
381
  rgb_image = rgb_image.unsqueeze(0)
382
  elif rgb_image.ndim == 3 and rgb_image.shape[2] == 3:
@@ -398,7 +416,7 @@ def depth2pcd(
398
  p_y = ((sampled_coord[:, 0] + 1) * (rgb_image.shape[2] / 2.0)) - 0.5 # h --> y
399
  ones = torch.ones_like(p_x)
400
 
401
- cam_coords = torch.stack([p_x, p_y, ones], dim=1) * sampled_depth.unsqueeze(1)
402
  cam_coords = cam_coords @ torch.inverse(torch.from_numpy(ixt).float().to(device)).T
403
 
404
  if ext is not None:
@@ -420,9 +438,21 @@ def depth2pcd(
420
  if clip_box[4] is not None: mask &= cam_coords[:, 1] >= clip_box[4]
421
  if clip_box[5] is not None: mask &= cam_coords[:, 2] >= clip_box[5]
422
 
 
 
 
 
 
 
 
 
 
 
 
 
423
  pcd = o3d.geometry.PointCloud()
424
- pcd.points = o3d.utility.Vector3dVector(cam_coords[mask, :3].cpu().numpy())
425
- pcd.colors = o3d.utility.Vector3dVector(sampled_rgb[mask].cpu().numpy())
426
 
427
  if ret_mask:
428
  return pcd, mask
 
377
  ):
378
  device = sampled_coord.device if torch.is_tensor(sampled_coord) else torch.device("cpu")
379
 
380
+ if not torch.is_tensor(sampled_coord):
381
+ sampled_coord = torch.as_tensor(sampled_coord, dtype=torch.float32, device=device)
382
+ else:
383
+ sampled_coord = sampled_coord.to(device=device, dtype=torch.float32)
384
+
385
+ if not torch.is_tensor(sampled_depth):
386
+ sampled_depth = torch.as_tensor(sampled_depth, dtype=torch.float32, device=device)
387
+ else:
388
+ sampled_depth = sampled_depth.to(device=device, dtype=torch.float32)
389
+
390
+ sampled_coord = sampled_coord.reshape(-1, 2)
391
+ sampled_depth = sampled_depth.reshape(-1)
392
+ if sampled_coord.shape[0] != sampled_depth.shape[0]:
393
+ raise ValueError(
394
+ f"sampled_coord and sampled_depth must contain the same number of points, "
395
+ f"got {sampled_coord.shape[0]} and {sampled_depth.shape[0]}"
396
+ )
397
+
398
  if rgb_image.ndim == 3 and rgb_image.shape[0] == 3:
399
  rgb_image = rgb_image.unsqueeze(0)
400
  elif rgb_image.ndim == 3 and rgb_image.shape[2] == 3:
 
416
  p_y = ((sampled_coord[:, 0] + 1) * (rgb_image.shape[2] / 2.0)) - 0.5 # h --> y
417
  ones = torch.ones_like(p_x)
418
 
419
+ cam_coords = torch.stack([p_x, p_y, ones], dim=1) * sampled_depth[:, None]
420
  cam_coords = cam_coords @ torch.inverse(torch.from_numpy(ixt).float().to(device)).T
421
 
422
  if ext is not None:
 
438
  if clip_box[4] is not None: mask &= cam_coords[:, 1] >= clip_box[4]
439
  if clip_box[5] is not None: mask &= cam_coords[:, 2] >= clip_box[5]
440
 
441
+ points_np = np.ascontiguousarray(
442
+ cam_coords[mask, :3].detach().cpu().to(torch.float64).numpy()
443
+ )
444
+ colors_np = np.ascontiguousarray(
445
+ sampled_rgb[mask].detach().cpu().to(torch.float64).numpy()
446
+ )
447
+
448
+ if points_np.ndim != 2 or points_np.shape[1] != 3:
449
+ raise ValueError(f"Point cloud points must have shape [N, 3], got {points_np.shape}")
450
+ if colors_np.ndim != 2 or colors_np.shape[1] != 3:
451
+ raise ValueError(f"Point cloud colors must have shape [N, 3], got {colors_np.shape}")
452
+
453
  pcd = o3d.geometry.PointCloud()
454
+ pcd.points = o3d.utility.Vector3dVector(points_np)
455
+ pcd.colors = o3d.utility.Vector3dVector(colors_np)
456
 
457
  if ret_mask:
458
  return pcd, mask
InfiniDepth/utils/sampling_utils.py CHANGED
@@ -49,7 +49,7 @@ def make_2d_uniform_coord(shape, ranges=None, flatten=True):
49
  r = (v1 - v0) / (2 * n)
50
  seq = v0 + r + (2 * r) * torch.arange(n).float()
51
  coord_seqs.append(seq)
52
- query_coords = torch.stack(torch.meshgrid(*coord_seqs), dim=-1)
53
  if flatten:
54
  query_coords = query_coords.view(-1, query_coords.shape[-1])
55
  return query_coords
 
49
  r = (v1 - v0) / (2 * n)
50
  seq = v0 + r + (2 * r) * torch.arange(n).float()
51
  coord_seqs.append(seq)
52
+ query_coords = torch.stack(torch.meshgrid(*coord_seqs, indexing="ij"), dim=-1)
53
  if flatten:
54
  query_coords = query_coords.view(-1, query_coords.shape[-1])
55
  return query_coords
__pycache__/app.cpython-310.pyc CHANGED
Binary files a/__pycache__/app.cpython-310.pyc and b/__pycache__/app.cpython-310.pyc differ
 
app.py CHANGED
@@ -66,6 +66,23 @@ CUSTOM_CSS = """
66
  """
67
 
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def _none_if_invalid(value: Optional[float]) -> Optional[float]:
70
  if value is None:
71
  return None
@@ -195,7 +212,7 @@ with gr.Blocks(title="InfiniDepth Demo", theme=gr.themes.Soft(), css=CUSTOM_CSS)
195
  label="Model Type",
196
  )
197
  input_size = gr.Dropdown(
198
- choices=["504x672", "768x1024"],
199
  value="768x1024",
200
  label="Inference Resolution (HxW)",
201
  )
@@ -212,8 +229,8 @@ with gr.Blocks(title="InfiniDepth Demo", theme=gr.themes.Soft(), css=CUSTOM_CSS)
212
  label="Super-resolution Ratio",
213
  )
214
  max_points_preview = gr.Slider(
215
- minimum=5000,
216
- maximum=120000,
217
  value=60000,
218
  step=5000,
219
  label="Max Preview Points",
@@ -275,4 +292,5 @@ demo = demo.queue()
275
 
276
 
277
  if __name__ == "__main__":
278
- demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))
 
 
66
  """
67
 
68
 
69
+ def _ensure_localhost_bypasses_proxy() -> None:
70
+ localhost_hosts = ("localhost", "127.0.0.1", "::1")
71
+ for env_key in ("NO_PROXY", "no_proxy"):
72
+ current = os.environ.get(env_key, "")
73
+ items = [item.strip() for item in current.split(",") if item.strip()]
74
+ changed = False
75
+ for host in localhost_hosts:
76
+ if host not in items:
77
+ items.append(host)
78
+ changed = True
79
+ if changed or current:
80
+ os.environ[env_key] = ",".join(items)
81
+
82
+
83
+ _ensure_localhost_bypasses_proxy()
84
+
85
+
86
  def _none_if_invalid(value: Optional[float]) -> Optional[float]:
87
  if value is None:
88
  return None
 
212
  label="Model Type",
213
  )
214
  input_size = gr.Dropdown(
215
+ choices=["512x672", "768x1024"],
216
  value="768x1024",
217
  label="Inference Resolution (HxW)",
218
  )
 
229
  label="Super-resolution Ratio",
230
  )
231
  max_points_preview = gr.Slider(
232
+ minimum=10000,
233
+ maximum=1000000,
234
  value=60000,
235
  step=5000,
236
  label="Max Preview Points",
 
292
 
293
 
294
  if __name__ == "__main__":
295
+ server_name = "0.0.0.0" if os.getenv("SPACE_ID") else "127.0.0.1"
296
+ demo.launch(server_name=server_name, server_port=int(os.getenv("PORT", "7861")))
requirements.txt CHANGED
@@ -4,6 +4,7 @@
4
  torch==2.9.1
5
  torchvision==0.24.1
6
  torchaudio==2.9.1
 
7
  hydra-colorlog
8
  hydra-core
9
  h5py
 
4
  torch==2.9.1
5
  torchvision==0.24.1
6
  torchaudio==2.9.1
7
+ xformers==0.0.33.post1
8
  hydra-colorlog
9
  hydra-core
10
  h5py