Spaces:

ritianyu
/

InfiniDepth

Running on Zero

App Files Files Community

ritianyu commited on 19 days ago

Commit

16ea76a

1 Parent(s): 82fa3eb

update

Browse files

Files changed (4) hide show

InfiniDepth/model/model.py +8 -2
InfiniDepth/utils/hf_demo_utils.py +48 -4
InfiniDepth/utils/moge_utils.py +35 -16
app.py +20 -6

InfiniDepth/model/model.py CHANGED Viewed

@@ -59,6 +59,7 @@ class _BaseInfiniDepthModel(nn.Module):
         self,
         model_path: Optional[str] = None,
         encoder: str = "vitl16",
     ):
         super().__init__()
         self.model_config = dinov3_model_configs[encoder]
@@ -96,8 +97,13 @@ class _BaseInfiniDepthModel(nn.Module):
                 raise FileNotFoundError(f"Model file {model_path} not found")
         # only for inference
-        if torch.cuda.is_available():
-            self.cuda()
         self.eval()
     def _init_variant_modules(self):

         self,
         model_path: Optional[str] = None,
         encoder: str = "vitl16",
+        device: Optional[str | torch.device] = None,
     ):
         super().__init__()
         self.model_config = dinov3_model_configs[encoder]
                 raise FileNotFoundError(f"Model file {model_path} not found")
         # only for inference
+        target_device = None
+        if device is not None:
+            target_device = torch.device(device)
+        elif torch.cuda.is_available():
+            target_device = torch.device("cuda")
+        if target_device is not None:
+            self.to(target_device)
         self.eval()
     def _init_variant_modules(self):

InfiniDepth/utils/hf_demo_utils.py CHANGED Viewed

@@ -19,6 +19,7 @@ from .inference_utils import (
 from .io_utils import depth2pcd, depth_to_disparity
 from .logger import Log
 from .model_utils import build_model
 from .sampling_utils import make_2d_uniform_coord
 from .vis_utils import clip_outliers_by_percentile, colorize_depth_maps
@@ -128,6 +129,23 @@ def prepare_runtime_assets() -> None:
     resolve_moge2_pretrained()
 def _report_stage(stage_callback: Optional[Callable[[str], None]], stage: str) -> None:
     if stage_callback is not None:
         stage_callback(stage)
@@ -169,18 +187,43 @@ class ModelCache:
     def __init__(self):
         self._cache: dict[tuple[str, str], Any] = {}
-    def get(self, model_type: str, model_path: str):
         key = (model_type, model_path)
         if key not in self._cache:
-            Log.info(f"Loading model: model_type={model_type}, checkpoint={model_path}")
             self._cache[key] = build_model(
                 model_type=model_type,
                 model_path=model_path,
             )
         else:
             Log.info(f"Using cached model: model_type={model_type}, checkpoint={model_path}")
         return self._cache[key]
 def _parse_image_size(size_text: str) -> tuple[int, int]:
     try:
@@ -399,7 +442,7 @@ def run_single_image_demo(
     ckpt_path = resolve_checkpoint_path(model_type)
     _report_stage(stage_callback, f"demo:checkpoint_resolved path={ckpt_path}")
     model_cache = model_cache or ModelCache()
-    model = model_cache.get(model_type=model_type, model_path=ckpt_path)
     _report_stage(stage_callback, "demo:model_loaded")
     query_2d_uniform_coord = make_2d_uniform_coord((h_out, w_out)).unsqueeze(0).to(device)
@@ -551,8 +594,9 @@ def run_gpu_inference(
     prompt_mask = prompt > 0
     ckpt_path = resolve_checkpoint_path(model_type)
     model_cache = model_cache or ModelCache()
-    model = model_cache.get(model_type=model_type, model_path=ckpt_path)
     if _debug:
         torch.cuda.synchronize()
         Log.info(f"[GPU-DEBUG] model_loaded: GPU mem allocated={torch.cuda.memory_allocated(device) / 1e6:.1f}MB")

 from .io_utils import depth2pcd, depth_to_disparity
 from .logger import Log
 from .model_utils import build_model
+from .moge_utils import preload_moge2_model
 from .sampling_utils import make_2d_uniform_coord
 from .vis_utils import clip_outliers_by_percentile, colorize_depth_maps
     resolve_moge2_pretrained()
+def preload_space_runtime_models(
+    model_cache: "ModelCache",
+    default_model_type: str = "InfiniDepth",
+) -> None:
+    if not SPACE_RUNTIME:
+        return
+    Log.info(
+        f"Preloading default ZeroGPU runtime models on CPU: default_model_type={default_model_type}"
+    )
+    model_cache.preload(
+        model_type=default_model_type,
+        model_path=resolve_checkpoint_path(default_model_type),
+    )
+    preload_moge2_model(resolve_moge2_pretrained())
 def _report_stage(stage_callback: Optional[Callable[[str], None]], stage: str) -> None:
     if stage_callback is not None:
         stage_callback(stage)
     def __init__(self):
         self._cache: dict[tuple[str, str], Any] = {}
+    @staticmethod
+    def _get_model_device(model: Any) -> torch.device:
+        try:
+            return next(model.parameters()).device
+        except StopIteration:
+            return torch.device("cpu")
+    def get(self, model_type: str, model_path: str, device: Optional[torch.device | str] = None):
         key = (model_type, model_path)
         if key not in self._cache:
+            load_device = torch.device(device) if device is not None else torch.device("cpu")
+            Log.info(
+                f"Loading model: model_type={model_type}, checkpoint={model_path}, device={load_device}"
+            )
             self._cache[key] = build_model(
                 model_type=model_type,
                 model_path=model_path,
+                device=load_device,
             )
         else:
             Log.info(f"Using cached model: model_type={model_type}, checkpoint={model_path}")
+        if device is not None:
+            target_device = torch.device(device)
+            current_device = self._get_model_device(self._cache[key])
+            if current_device != target_device:
+                Log.info(
+                    f"Moving cached model to device: model_type={model_type}, "
+                    f"checkpoint={model_path}, {current_device} -> {target_device}"
+                )
+                self._cache[key] = self._cache[key].to(target_device)
+                self._cache[key].eval()
         return self._cache[key]
+    def preload(self, model_type: str, model_path: str) -> None:
+        self.get(model_type=model_type, model_path=model_path, device=torch.device("cpu"))
 def _parse_image_size(size_text: str) -> tuple[int, int]:
     try:
     ckpt_path = resolve_checkpoint_path(model_type)
     _report_stage(stage_callback, f"demo:checkpoint_resolved path={ckpt_path}")
     model_cache = model_cache or ModelCache()
+    model = model_cache.get(model_type=model_type, model_path=ckpt_path, device=device)
     _report_stage(stage_callback, "demo:model_loaded")
     query_2d_uniform_coord = make_2d_uniform_coord((h_out, w_out)).unsqueeze(0).to(device)
     prompt_mask = prompt > 0
     ckpt_path = resolve_checkpoint_path(model_type)
+    _report_stage(stage_callback, "gpu:loading_model")
     model_cache = model_cache or ModelCache()
+    model = model_cache.get(model_type=model_type, model_path=ckpt_path, device=device)
     if _debug:
         torch.cuda.synchronize()
         Log.info(f"[GPU-DEBUG] model_loaded: GPU mem allocated={torch.cuda.memory_allocated(device) / 1e6:.1f}MB")

InfiniDepth/utils/moge_utils.py CHANGED Viewed

@@ -4,29 +4,48 @@ from typing import Optional
 import torch
-_MOGE2_MODEL_CACHE: dict[tuple[str, str], torch.nn.Module] = {}
-def _get_moge2_model(pretrained_model_name_or_path: str, device: torch.device) -> torch.nn.Module:
-    cache_key = (pretrained_model_name_or_path, str(device))
-    if cache_key in _MOGE2_MODEL_CACHE:
-        return _MOGE2_MODEL_CACHE[cache_key]
     try:
-        from moge.model.v2 import MoGeModel
-    except ImportError as exc:
-        raise ImportError(
-            "MoGe is not installed. Please install it first: "
-            "`pip install git+https://github.com/microsoft/MoGe.git`"
-        ) from exc
-    model = MoGeModel.from_pretrained(pretrained_model_name_or_path).to(device)
-    model.eval()
-    _MOGE2_MODEL_CACHE[cache_key] = model
     return model
 def _squeeze_hw(tensor: torch.Tensor, name: str) -> torch.Tensor:
     if tensor.ndim == 2:
         return tensor

 import torch
+from .logger import Log
+_MOGE2_MODEL_CACHE: dict[str, torch.nn.Module] = {}
+def _get_model_device(model: torch.nn.Module) -> torch.device:
     try:
+        return next(model.parameters()).device
+    except StopIteration:
+        return torch.device("cpu")
+def _get_moge2_model(pretrained_model_name_or_path: str, device: torch.device) -> torch.nn.Module:
+    model = _MOGE2_MODEL_CACHE.get(pretrained_model_name_or_path)
+    if model is None:
+        try:
+            from moge.model.v2 import MoGeModel
+        except ImportError as exc:
+            raise ImportError(
+                "MoGe is not installed. Please install it first: "
+                "`pip install git+https://github.com/microsoft/MoGe.git`"
+            ) from exc
+        Log.info(f"Loading MoGe-2 model from {pretrained_model_name_or_path} on CPU")
+        model = MoGeModel.from_pretrained(pretrained_model_name_or_path)
+        model.eval()
+        _MOGE2_MODEL_CACHE[pretrained_model_name_or_path] = model
+    current_device = _get_model_device(model)
+    target_device = torch.device(device)
+    if current_device != target_device:
+        Log.info(f"Moving MoGe-2 model to device: {current_device} -> {target_device}")
+        model = model.to(target_device)
+        model.eval()
+        _MOGE2_MODEL_CACHE[pretrained_model_name_or_path] = model
     return model
+def preload_moge2_model(pretrained_model_name_or_path: str) -> None:
+    _get_moge2_model(pretrained_model_name_or_path, torch.device("cpu"))
 def _squeeze_hw(tensor: torch.Tensor, name: str) -> torch.Tensor:
     if tensor.ndim == 2:
         return tensor

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import shutil
 import tempfile
 import traceback
 import uuid
 from pathlib import Path
@@ -23,7 +24,13 @@ import gradio as gr
 import numpy as np
 from PIL import Image
-from InfiniDepth.utils.hf_demo_utils import ModelCache, prepare_runtime_assets, run_gpu_inference, postprocess_gpu_result
 from InfiniDepth.utils.logger import Log
 try:
@@ -33,6 +40,8 @@ except ImportError:
 MODEL_CACHE = ModelCache()
 OUTPUT_ROOT = Path(tempfile.gettempdir()) / "infinidepth_hf_demo"
 TRACE_ROOT = OUTPUT_ROOT / "trace"
 EXAMPLE_DATA_ROOT = Path(__file__).resolve().parent / "example_data"
@@ -207,7 +216,7 @@ def _append_trace(trace_path: str, stage: str) -> None:
     trace_file = Path(trace_path)
     trace_file.parent.mkdir(parents=True, exist_ok=True)
     with trace_file.open("a", encoding="utf-8") as handle:
-        handle.write(f"{stage}\n")
         handle.flush()
@@ -396,6 +405,8 @@ def run_demo(
         if os.getenv("INFINIDEPTH_SHOW_TRACEBACK", "0") == "1":
             error_message = f"{error_message}\n\n{error_trace}"
         # Always log full traceback to server logs (visible in HF Space Logs tab)
         Log.error(f"[{request_id}] Full traceback:\n{error_trace}")
         return None, None, [], error_message
@@ -427,7 +438,7 @@ with gr.Blocks(title="InfiniDepth Demo", theme=gr.themes.Soft(), css=CUSTOM_CSS,
                 )
                 input_size = gr.Dropdown(
                     choices=["512x672", "768x1024"],
-                    value="768x1024",
                     label="Inference Resolution (HxW)",
                 )
                 output_resolution_mode = gr.Dropdown(
@@ -461,7 +472,8 @@ with gr.Blocks(title="InfiniDepth Demo", theme=gr.themes.Soft(), css=CUSTOM_CSS,
             gr.Markdown(
                 "Tips: when a depth map is uploaded it will be used automatically, otherwise the demo falls back to MoGe-2. "
                 "If camera intrinsics are missing, the demo first tries MoGe-2 estimates before image-size defaults. "
-                "Use lower preview points for faster 3D interaction."
             )
         with gr.Column(elem_id="right-panel"):
@@ -522,7 +534,9 @@ demo = demo.queue()
 if __name__ == "__main__":
     prepare_runtime_assets()
-    server_name = "0.0.0.0" if os.getenv("SPACE_ID") else "127.0.0.1"
     # Hugging Face Spaces with Gradio typically expects port 7860.
     # Respect explicit Gradio/PORT env overrides when provided.
     server_port = int(os.getenv("GRADIO_SERVER_PORT", os.getenv("PORT", "7860")))
@@ -530,7 +544,7 @@ if __name__ == "__main__":
         "server_name": server_name,
         "server_port": server_port,
     }
-    if os.getenv("SPACE_ID"):
         launch_kwargs["ssr_mode"] = False
     if os.getenv("INFINIDEPTH_SHOW_ERROR", "0") == "1":
         launch_kwargs["show_error"] = True

 import os
 import shutil
 import tempfile
+import time
 import traceback
 import uuid
 from pathlib import Path
 import numpy as np
 from PIL import Image
+from InfiniDepth.utils.hf_demo_utils import (
+    ModelCache,
+    postprocess_gpu_result,
+    prepare_runtime_assets,
+    preload_space_runtime_models,
+    run_gpu_inference,
+)
 from InfiniDepth.utils.logger import Log
 try:
 MODEL_CACHE = ModelCache()
+SPACE_RUNTIME = bool(os.getenv("SPACE_ID"))
+DEFAULT_INPUT_SIZE = "512x672" if SPACE_RUNTIME else "768x1024"
 OUTPUT_ROOT = Path(tempfile.gettempdir()) / "infinidepth_hf_demo"
 TRACE_ROOT = OUTPUT_ROOT / "trace"
 EXAMPLE_DATA_ROOT = Path(__file__).resolve().parent / "example_data"
     trace_file = Path(trace_path)
     trace_file.parent.mkdir(parents=True, exist_ok=True)
     with trace_file.open("a", encoding="utf-8") as handle:
+        handle.write(f"{time.strftime('%H:%M:%S')} {stage}\n")
         handle.flush()
         if os.getenv("INFINIDEPTH_SHOW_TRACEBACK", "0") == "1":
             error_message = f"{error_message}\n\n{error_trace}"
         # Always log full traceback to server logs (visible in HF Space Logs tab)
+        Log.error(f"[{request_id}] trace_path={trace_path}")
+        Log.error(f"[{request_id}] Last worker stages:\n{trace_summary or '<none>'}")
         Log.error(f"[{request_id}] Full traceback:\n{error_trace}")
         return None, None, [], error_message
                 )
                 input_size = gr.Dropdown(
                     choices=["512x672", "768x1024"],
+                    value=DEFAULT_INPUT_SIZE,
                     label="Inference Resolution (HxW)",
                 )
                 output_resolution_mode = gr.Dropdown(
             gr.Markdown(
                 "Tips: when a depth map is uploaded it will be used automatically, otherwise the demo falls back to MoGe-2. "
                 "If camera intrinsics are missing, the demo first tries MoGe-2 estimates before image-size defaults. "
+                "Use lower preview points for faster 3D interaction. "
+                "On ZeroGPU, `512x672` is the safest default for cold starts."
             )
         with gr.Column(elem_id="right-panel"):
 if __name__ == "__main__":
     prepare_runtime_assets()
+    if os.getenv("INFINIDEPTH_PRELOAD_DEFAULT_MODELS", "1" if SPACE_RUNTIME else "0") == "1":
+        preload_space_runtime_models(model_cache=MODEL_CACHE, default_model_type="InfiniDepth")
+    server_name = "0.0.0.0" if SPACE_RUNTIME else "127.0.0.1"
     # Hugging Face Spaces with Gradio typically expects port 7860.
     # Respect explicit Gradio/PORT env overrides when provided.
     server_port = int(os.getenv("GRADIO_SERVER_PORT", os.getenv("PORT", "7860")))
         "server_name": server_name,
         "server_port": server_port,
     }
+    if SPACE_RUNTIME:
         launch_kwargs["ssr_mode"] = False
     if os.getenv("INFINIDEPTH_SHOW_ERROR", "0") == "1":
         launch_kwargs["show_error"] = True