ritianyu commited on
Commit
16ea76a
·
1 Parent(s): 82fa3eb
InfiniDepth/model/model.py CHANGED
@@ -59,6 +59,7 @@ class _BaseInfiniDepthModel(nn.Module):
59
  self,
60
  model_path: Optional[str] = None,
61
  encoder: str = "vitl16",
 
62
  ):
63
  super().__init__()
64
  self.model_config = dinov3_model_configs[encoder]
@@ -96,8 +97,13 @@ class _BaseInfiniDepthModel(nn.Module):
96
  raise FileNotFoundError(f"Model file {model_path} not found")
97
 
98
  # only for inference
99
- if torch.cuda.is_available():
100
- self.cuda()
 
 
 
 
 
101
  self.eval()
102
 
103
  def _init_variant_modules(self):
 
59
  self,
60
  model_path: Optional[str] = None,
61
  encoder: str = "vitl16",
62
+ device: Optional[str | torch.device] = None,
63
  ):
64
  super().__init__()
65
  self.model_config = dinov3_model_configs[encoder]
 
97
  raise FileNotFoundError(f"Model file {model_path} not found")
98
 
99
  # only for inference
100
+ target_device = None
101
+ if device is not None:
102
+ target_device = torch.device(device)
103
+ elif torch.cuda.is_available():
104
+ target_device = torch.device("cuda")
105
+ if target_device is not None:
106
+ self.to(target_device)
107
  self.eval()
108
 
109
  def _init_variant_modules(self):
InfiniDepth/utils/hf_demo_utils.py CHANGED
@@ -19,6 +19,7 @@ from .inference_utils import (
19
  from .io_utils import depth2pcd, depth_to_disparity
20
  from .logger import Log
21
  from .model_utils import build_model
 
22
  from .sampling_utils import make_2d_uniform_coord
23
  from .vis_utils import clip_outliers_by_percentile, colorize_depth_maps
24
 
@@ -128,6 +129,23 @@ def prepare_runtime_assets() -> None:
128
  resolve_moge2_pretrained()
129
 
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  def _report_stage(stage_callback: Optional[Callable[[str], None]], stage: str) -> None:
132
  if stage_callback is not None:
133
  stage_callback(stage)
@@ -169,18 +187,43 @@ class ModelCache:
169
  def __init__(self):
170
  self._cache: dict[tuple[str, str], Any] = {}
171
 
172
- def get(self, model_type: str, model_path: str):
 
 
 
 
 
 
 
173
  key = (model_type, model_path)
174
  if key not in self._cache:
175
- Log.info(f"Loading model: model_type={model_type}, checkpoint={model_path}")
 
 
 
176
  self._cache[key] = build_model(
177
  model_type=model_type,
178
  model_path=model_path,
 
179
  )
180
  else:
181
  Log.info(f"Using cached model: model_type={model_type}, checkpoint={model_path}")
 
 
 
 
 
 
 
 
 
 
 
182
  return self._cache[key]
183
 
 
 
 
184
 
185
  def _parse_image_size(size_text: str) -> tuple[int, int]:
186
  try:
@@ -399,7 +442,7 @@ def run_single_image_demo(
399
  ckpt_path = resolve_checkpoint_path(model_type)
400
  _report_stage(stage_callback, f"demo:checkpoint_resolved path={ckpt_path}")
401
  model_cache = model_cache or ModelCache()
402
- model = model_cache.get(model_type=model_type, model_path=ckpt_path)
403
  _report_stage(stage_callback, "demo:model_loaded")
404
 
405
  query_2d_uniform_coord = make_2d_uniform_coord((h_out, w_out)).unsqueeze(0).to(device)
@@ -551,8 +594,9 @@ def run_gpu_inference(
551
  prompt_mask = prompt > 0
552
 
553
  ckpt_path = resolve_checkpoint_path(model_type)
 
554
  model_cache = model_cache or ModelCache()
555
- model = model_cache.get(model_type=model_type, model_path=ckpt_path)
556
  if _debug:
557
  torch.cuda.synchronize()
558
  Log.info(f"[GPU-DEBUG] model_loaded: GPU mem allocated={torch.cuda.memory_allocated(device) / 1e6:.1f}MB")
 
19
  from .io_utils import depth2pcd, depth_to_disparity
20
  from .logger import Log
21
  from .model_utils import build_model
22
+ from .moge_utils import preload_moge2_model
23
  from .sampling_utils import make_2d_uniform_coord
24
  from .vis_utils import clip_outliers_by_percentile, colorize_depth_maps
25
 
 
129
  resolve_moge2_pretrained()
130
 
131
 
132
+ def preload_space_runtime_models(
133
+ model_cache: "ModelCache",
134
+ default_model_type: str = "InfiniDepth",
135
+ ) -> None:
136
+ if not SPACE_RUNTIME:
137
+ return
138
+
139
+ Log.info(
140
+ f"Preloading default ZeroGPU runtime models on CPU: default_model_type={default_model_type}"
141
+ )
142
+ model_cache.preload(
143
+ model_type=default_model_type,
144
+ model_path=resolve_checkpoint_path(default_model_type),
145
+ )
146
+ preload_moge2_model(resolve_moge2_pretrained())
147
+
148
+
149
  def _report_stage(stage_callback: Optional[Callable[[str], None]], stage: str) -> None:
150
  if stage_callback is not None:
151
  stage_callback(stage)
 
187
  def __init__(self):
188
  self._cache: dict[tuple[str, str], Any] = {}
189
 
190
+ @staticmethod
191
+ def _get_model_device(model: Any) -> torch.device:
192
+ try:
193
+ return next(model.parameters()).device
194
+ except StopIteration:
195
+ return torch.device("cpu")
196
+
197
+ def get(self, model_type: str, model_path: str, device: Optional[torch.device | str] = None):
198
  key = (model_type, model_path)
199
  if key not in self._cache:
200
+ load_device = torch.device(device) if device is not None else torch.device("cpu")
201
+ Log.info(
202
+ f"Loading model: model_type={model_type}, checkpoint={model_path}, device={load_device}"
203
+ )
204
  self._cache[key] = build_model(
205
  model_type=model_type,
206
  model_path=model_path,
207
+ device=load_device,
208
  )
209
  else:
210
  Log.info(f"Using cached model: model_type={model_type}, checkpoint={model_path}")
211
+
212
+ if device is not None:
213
+ target_device = torch.device(device)
214
+ current_device = self._get_model_device(self._cache[key])
215
+ if current_device != target_device:
216
+ Log.info(
217
+ f"Moving cached model to device: model_type={model_type}, "
218
+ f"checkpoint={model_path}, {current_device} -> {target_device}"
219
+ )
220
+ self._cache[key] = self._cache[key].to(target_device)
221
+ self._cache[key].eval()
222
  return self._cache[key]
223
 
224
+ def preload(self, model_type: str, model_path: str) -> None:
225
+ self.get(model_type=model_type, model_path=model_path, device=torch.device("cpu"))
226
+
227
 
228
  def _parse_image_size(size_text: str) -> tuple[int, int]:
229
  try:
 
442
  ckpt_path = resolve_checkpoint_path(model_type)
443
  _report_stage(stage_callback, f"demo:checkpoint_resolved path={ckpt_path}")
444
  model_cache = model_cache or ModelCache()
445
+ model = model_cache.get(model_type=model_type, model_path=ckpt_path, device=device)
446
  _report_stage(stage_callback, "demo:model_loaded")
447
 
448
  query_2d_uniform_coord = make_2d_uniform_coord((h_out, w_out)).unsqueeze(0).to(device)
 
594
  prompt_mask = prompt > 0
595
 
596
  ckpt_path = resolve_checkpoint_path(model_type)
597
+ _report_stage(stage_callback, "gpu:loading_model")
598
  model_cache = model_cache or ModelCache()
599
+ model = model_cache.get(model_type=model_type, model_path=ckpt_path, device=device)
600
  if _debug:
601
  torch.cuda.synchronize()
602
  Log.info(f"[GPU-DEBUG] model_loaded: GPU mem allocated={torch.cuda.memory_allocated(device) / 1e6:.1f}MB")
InfiniDepth/utils/moge_utils.py CHANGED
@@ -4,29 +4,48 @@ from typing import Optional
4
 
5
  import torch
6
 
 
7
 
8
- _MOGE2_MODEL_CACHE: dict[tuple[str, str], torch.nn.Module] = {}
9
 
10
 
11
- def _get_moge2_model(pretrained_model_name_or_path: str, device: torch.device) -> torch.nn.Module:
12
- cache_key = (pretrained_model_name_or_path, str(device))
13
- if cache_key in _MOGE2_MODEL_CACHE:
14
- return _MOGE2_MODEL_CACHE[cache_key]
15
-
16
  try:
17
- from moge.model.v2 import MoGeModel
18
- except ImportError as exc:
19
- raise ImportError(
20
- "MoGe is not installed. Please install it first: "
21
- "`pip install git+https://github.com/microsoft/MoGe.git`"
22
- ) from exc
23
-
24
- model = MoGeModel.from_pretrained(pretrained_model_name_or_path).to(device)
25
- model.eval()
26
- _MOGE2_MODEL_CACHE[cache_key] = model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  return model
28
 
29
 
 
 
 
 
30
  def _squeeze_hw(tensor: torch.Tensor, name: str) -> torch.Tensor:
31
  if tensor.ndim == 2:
32
  return tensor
 
4
 
5
  import torch
6
 
7
+ from .logger import Log
8
 
9
+ _MOGE2_MODEL_CACHE: dict[str, torch.nn.Module] = {}
10
 
11
 
12
+ def _get_model_device(model: torch.nn.Module) -> torch.device:
 
 
 
 
13
  try:
14
+ return next(model.parameters()).device
15
+ except StopIteration:
16
+ return torch.device("cpu")
17
+
18
+
19
+ def _get_moge2_model(pretrained_model_name_or_path: str, device: torch.device) -> torch.nn.Module:
20
+ model = _MOGE2_MODEL_CACHE.get(pretrained_model_name_or_path)
21
+ if model is None:
22
+ try:
23
+ from moge.model.v2 import MoGeModel
24
+ except ImportError as exc:
25
+ raise ImportError(
26
+ "MoGe is not installed. Please install it first: "
27
+ "`pip install git+https://github.com/microsoft/MoGe.git`"
28
+ ) from exc
29
+
30
+ Log.info(f"Loading MoGe-2 model from {pretrained_model_name_or_path} on CPU")
31
+ model = MoGeModel.from_pretrained(pretrained_model_name_or_path)
32
+ model.eval()
33
+ _MOGE2_MODEL_CACHE[pretrained_model_name_or_path] = model
34
+
35
+ current_device = _get_model_device(model)
36
+ target_device = torch.device(device)
37
+ if current_device != target_device:
38
+ Log.info(f"Moving MoGe-2 model to device: {current_device} -> {target_device}")
39
+ model = model.to(target_device)
40
+ model.eval()
41
+ _MOGE2_MODEL_CACHE[pretrained_model_name_or_path] = model
42
  return model
43
 
44
 
45
+ def preload_moge2_model(pretrained_model_name_or_path: str) -> None:
46
+ _get_moge2_model(pretrained_model_name_or_path, torch.device("cpu"))
47
+
48
+
49
  def _squeeze_hw(tensor: torch.Tensor, name: str) -> torch.Tensor:
50
  if tensor.ndim == 2:
51
  return tensor
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import shutil
3
  import tempfile
 
4
  import traceback
5
  import uuid
6
  from pathlib import Path
@@ -23,7 +24,13 @@ import gradio as gr
23
  import numpy as np
24
  from PIL import Image
25
 
26
- from InfiniDepth.utils.hf_demo_utils import ModelCache, prepare_runtime_assets, run_gpu_inference, postprocess_gpu_result
 
 
 
 
 
 
27
  from InfiniDepth.utils.logger import Log
28
 
29
  try:
@@ -33,6 +40,8 @@ except ImportError:
33
 
34
 
35
  MODEL_CACHE = ModelCache()
 
 
36
  OUTPUT_ROOT = Path(tempfile.gettempdir()) / "infinidepth_hf_demo"
37
  TRACE_ROOT = OUTPUT_ROOT / "trace"
38
  EXAMPLE_DATA_ROOT = Path(__file__).resolve().parent / "example_data"
@@ -207,7 +216,7 @@ def _append_trace(trace_path: str, stage: str) -> None:
207
  trace_file = Path(trace_path)
208
  trace_file.parent.mkdir(parents=True, exist_ok=True)
209
  with trace_file.open("a", encoding="utf-8") as handle:
210
- handle.write(f"{stage}\n")
211
  handle.flush()
212
 
213
 
@@ -396,6 +405,8 @@ def run_demo(
396
  if os.getenv("INFINIDEPTH_SHOW_TRACEBACK", "0") == "1":
397
  error_message = f"{error_message}\n\n{error_trace}"
398
  # Always log full traceback to server logs (visible in HF Space Logs tab)
 
 
399
  Log.error(f"[{request_id}] Full traceback:\n{error_trace}")
400
  return None, None, [], error_message
401
 
@@ -427,7 +438,7 @@ with gr.Blocks(title="InfiniDepth Demo", theme=gr.themes.Soft(), css=CUSTOM_CSS,
427
  )
428
  input_size = gr.Dropdown(
429
  choices=["512x672", "768x1024"],
430
- value="768x1024",
431
  label="Inference Resolution (HxW)",
432
  )
433
  output_resolution_mode = gr.Dropdown(
@@ -461,7 +472,8 @@ with gr.Blocks(title="InfiniDepth Demo", theme=gr.themes.Soft(), css=CUSTOM_CSS,
461
  gr.Markdown(
462
  "Tips: when a depth map is uploaded it will be used automatically, otherwise the demo falls back to MoGe-2. "
463
  "If camera intrinsics are missing, the demo first tries MoGe-2 estimates before image-size defaults. "
464
- "Use lower preview points for faster 3D interaction."
 
465
  )
466
 
467
  with gr.Column(elem_id="right-panel"):
@@ -522,7 +534,9 @@ demo = demo.queue()
522
 
523
  if __name__ == "__main__":
524
  prepare_runtime_assets()
525
- server_name = "0.0.0.0" if os.getenv("SPACE_ID") else "127.0.0.1"
 
 
526
  # Hugging Face Spaces with Gradio typically expects port 7860.
527
  # Respect explicit Gradio/PORT env overrides when provided.
528
  server_port = int(os.getenv("GRADIO_SERVER_PORT", os.getenv("PORT", "7860")))
@@ -530,7 +544,7 @@ if __name__ == "__main__":
530
  "server_name": server_name,
531
  "server_port": server_port,
532
  }
533
- if os.getenv("SPACE_ID"):
534
  launch_kwargs["ssr_mode"] = False
535
  if os.getenv("INFINIDEPTH_SHOW_ERROR", "0") == "1":
536
  launch_kwargs["show_error"] = True
 
1
  import os
2
  import shutil
3
  import tempfile
4
+ import time
5
  import traceback
6
  import uuid
7
  from pathlib import Path
 
24
  import numpy as np
25
  from PIL import Image
26
 
27
+ from InfiniDepth.utils.hf_demo_utils import (
28
+ ModelCache,
29
+ postprocess_gpu_result,
30
+ prepare_runtime_assets,
31
+ preload_space_runtime_models,
32
+ run_gpu_inference,
33
+ )
34
  from InfiniDepth.utils.logger import Log
35
 
36
  try:
 
40
 
41
 
42
  MODEL_CACHE = ModelCache()
43
+ SPACE_RUNTIME = bool(os.getenv("SPACE_ID"))
44
+ DEFAULT_INPUT_SIZE = "512x672" if SPACE_RUNTIME else "768x1024"
45
  OUTPUT_ROOT = Path(tempfile.gettempdir()) / "infinidepth_hf_demo"
46
  TRACE_ROOT = OUTPUT_ROOT / "trace"
47
  EXAMPLE_DATA_ROOT = Path(__file__).resolve().parent / "example_data"
 
216
  trace_file = Path(trace_path)
217
  trace_file.parent.mkdir(parents=True, exist_ok=True)
218
  with trace_file.open("a", encoding="utf-8") as handle:
219
+ handle.write(f"{time.strftime('%H:%M:%S')} {stage}\n")
220
  handle.flush()
221
 
222
 
 
405
  if os.getenv("INFINIDEPTH_SHOW_TRACEBACK", "0") == "1":
406
  error_message = f"{error_message}\n\n{error_trace}"
407
  # Always log full traceback to server logs (visible in HF Space Logs tab)
408
+ Log.error(f"[{request_id}] trace_path={trace_path}")
409
+ Log.error(f"[{request_id}] Last worker stages:\n{trace_summary or '<none>'}")
410
  Log.error(f"[{request_id}] Full traceback:\n{error_trace}")
411
  return None, None, [], error_message
412
 
 
438
  )
439
  input_size = gr.Dropdown(
440
  choices=["512x672", "768x1024"],
441
+ value=DEFAULT_INPUT_SIZE,
442
  label="Inference Resolution (HxW)",
443
  )
444
  output_resolution_mode = gr.Dropdown(
 
472
  gr.Markdown(
473
  "Tips: when a depth map is uploaded it will be used automatically, otherwise the demo falls back to MoGe-2. "
474
  "If camera intrinsics are missing, the demo first tries MoGe-2 estimates before image-size defaults. "
475
+ "Use lower preview points for faster 3D interaction. "
476
+ "On ZeroGPU, `512x672` is the safest default for cold starts."
477
  )
478
 
479
  with gr.Column(elem_id="right-panel"):
 
534
 
535
  if __name__ == "__main__":
536
  prepare_runtime_assets()
537
+ if os.getenv("INFINIDEPTH_PRELOAD_DEFAULT_MODELS", "1" if SPACE_RUNTIME else "0") == "1":
538
+ preload_space_runtime_models(model_cache=MODEL_CACHE, default_model_type="InfiniDepth")
539
+ server_name = "0.0.0.0" if SPACE_RUNTIME else "127.0.0.1"
540
  # Hugging Face Spaces with Gradio typically expects port 7860.
541
  # Respect explicit Gradio/PORT env overrides when provided.
542
  server_port = int(os.getenv("GRADIO_SERVER_PORT", os.getenv("PORT", "7860")))
 
544
  "server_name": server_name,
545
  "server_port": server_port,
546
  }
547
+ if SPACE_RUNTIME:
548
  launch_kwargs["ssr_mode"] = False
549
  if os.getenv("INFINIDEPTH_SHOW_ERROR", "0") == "1":
550
  launch_kwargs["show_error"] = True