Spaces:
Running on Zero
Running on Zero
update
Browse files- InfiniDepth/model/model.py +8 -2
- InfiniDepth/utils/hf_demo_utils.py +48 -4
- InfiniDepth/utils/moge_utils.py +35 -16
- app.py +20 -6
InfiniDepth/model/model.py
CHANGED
|
@@ -59,6 +59,7 @@ class _BaseInfiniDepthModel(nn.Module):
|
|
| 59 |
self,
|
| 60 |
model_path: Optional[str] = None,
|
| 61 |
encoder: str = "vitl16",
|
|
|
|
| 62 |
):
|
| 63 |
super().__init__()
|
| 64 |
self.model_config = dinov3_model_configs[encoder]
|
|
@@ -96,8 +97,13 @@ class _BaseInfiniDepthModel(nn.Module):
|
|
| 96 |
raise FileNotFoundError(f"Model file {model_path} not found")
|
| 97 |
|
| 98 |
# only for inference
|
| 99 |
-
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
self.eval()
|
| 102 |
|
| 103 |
def _init_variant_modules(self):
|
|
|
|
| 59 |
self,
|
| 60 |
model_path: Optional[str] = None,
|
| 61 |
encoder: str = "vitl16",
|
| 62 |
+
device: Optional[str | torch.device] = None,
|
| 63 |
):
|
| 64 |
super().__init__()
|
| 65 |
self.model_config = dinov3_model_configs[encoder]
|
|
|
|
| 97 |
raise FileNotFoundError(f"Model file {model_path} not found")
|
| 98 |
|
| 99 |
# only for inference
|
| 100 |
+
target_device = None
|
| 101 |
+
if device is not None:
|
| 102 |
+
target_device = torch.device(device)
|
| 103 |
+
elif torch.cuda.is_available():
|
| 104 |
+
target_device = torch.device("cuda")
|
| 105 |
+
if target_device is not None:
|
| 106 |
+
self.to(target_device)
|
| 107 |
self.eval()
|
| 108 |
|
| 109 |
def _init_variant_modules(self):
|
InfiniDepth/utils/hf_demo_utils.py
CHANGED
|
@@ -19,6 +19,7 @@ from .inference_utils import (
|
|
| 19 |
from .io_utils import depth2pcd, depth_to_disparity
|
| 20 |
from .logger import Log
|
| 21 |
from .model_utils import build_model
|
|
|
|
| 22 |
from .sampling_utils import make_2d_uniform_coord
|
| 23 |
from .vis_utils import clip_outliers_by_percentile, colorize_depth_maps
|
| 24 |
|
|
@@ -128,6 +129,23 @@ def prepare_runtime_assets() -> None:
|
|
| 128 |
resolve_moge2_pretrained()
|
| 129 |
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
def _report_stage(stage_callback: Optional[Callable[[str], None]], stage: str) -> None:
|
| 132 |
if stage_callback is not None:
|
| 133 |
stage_callback(stage)
|
|
@@ -169,18 +187,43 @@ class ModelCache:
|
|
| 169 |
def __init__(self):
|
| 170 |
self._cache: dict[tuple[str, str], Any] = {}
|
| 171 |
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
key = (model_type, model_path)
|
| 174 |
if key not in self._cache:
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
| 176 |
self._cache[key] = build_model(
|
| 177 |
model_type=model_type,
|
| 178 |
model_path=model_path,
|
|
|
|
| 179 |
)
|
| 180 |
else:
|
| 181 |
Log.info(f"Using cached model: model_type={model_type}, checkpoint={model_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
return self._cache[key]
|
| 183 |
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
def _parse_image_size(size_text: str) -> tuple[int, int]:
|
| 186 |
try:
|
|
@@ -399,7 +442,7 @@ def run_single_image_demo(
|
|
| 399 |
ckpt_path = resolve_checkpoint_path(model_type)
|
| 400 |
_report_stage(stage_callback, f"demo:checkpoint_resolved path={ckpt_path}")
|
| 401 |
model_cache = model_cache or ModelCache()
|
| 402 |
-
model = model_cache.get(model_type=model_type, model_path=ckpt_path)
|
| 403 |
_report_stage(stage_callback, "demo:model_loaded")
|
| 404 |
|
| 405 |
query_2d_uniform_coord = make_2d_uniform_coord((h_out, w_out)).unsqueeze(0).to(device)
|
|
@@ -551,8 +594,9 @@ def run_gpu_inference(
|
|
| 551 |
prompt_mask = prompt > 0
|
| 552 |
|
| 553 |
ckpt_path = resolve_checkpoint_path(model_type)
|
|
|
|
| 554 |
model_cache = model_cache or ModelCache()
|
| 555 |
-
model = model_cache.get(model_type=model_type, model_path=ckpt_path)
|
| 556 |
if _debug:
|
| 557 |
torch.cuda.synchronize()
|
| 558 |
Log.info(f"[GPU-DEBUG] model_loaded: GPU mem allocated={torch.cuda.memory_allocated(device) / 1e6:.1f}MB")
|
|
|
|
| 19 |
from .io_utils import depth2pcd, depth_to_disparity
|
| 20 |
from .logger import Log
|
| 21 |
from .model_utils import build_model
|
| 22 |
+
from .moge_utils import preload_moge2_model
|
| 23 |
from .sampling_utils import make_2d_uniform_coord
|
| 24 |
from .vis_utils import clip_outliers_by_percentile, colorize_depth_maps
|
| 25 |
|
|
|
|
| 129 |
resolve_moge2_pretrained()
|
| 130 |
|
| 131 |
|
| 132 |
+
def preload_space_runtime_models(
|
| 133 |
+
model_cache: "ModelCache",
|
| 134 |
+
default_model_type: str = "InfiniDepth",
|
| 135 |
+
) -> None:
|
| 136 |
+
if not SPACE_RUNTIME:
|
| 137 |
+
return
|
| 138 |
+
|
| 139 |
+
Log.info(
|
| 140 |
+
f"Preloading default ZeroGPU runtime models on CPU: default_model_type={default_model_type}"
|
| 141 |
+
)
|
| 142 |
+
model_cache.preload(
|
| 143 |
+
model_type=default_model_type,
|
| 144 |
+
model_path=resolve_checkpoint_path(default_model_type),
|
| 145 |
+
)
|
| 146 |
+
preload_moge2_model(resolve_moge2_pretrained())
|
| 147 |
+
|
| 148 |
+
|
| 149 |
def _report_stage(stage_callback: Optional[Callable[[str], None]], stage: str) -> None:
|
| 150 |
if stage_callback is not None:
|
| 151 |
stage_callback(stage)
|
|
|
|
| 187 |
def __init__(self):
|
| 188 |
self._cache: dict[tuple[str, str], Any] = {}
|
| 189 |
|
| 190 |
+
@staticmethod
|
| 191 |
+
def _get_model_device(model: Any) -> torch.device:
|
| 192 |
+
try:
|
| 193 |
+
return next(model.parameters()).device
|
| 194 |
+
except StopIteration:
|
| 195 |
+
return torch.device("cpu")
|
| 196 |
+
|
| 197 |
+
def get(self, model_type: str, model_path: str, device: Optional[torch.device | str] = None):
|
| 198 |
key = (model_type, model_path)
|
| 199 |
if key not in self._cache:
|
| 200 |
+
load_device = torch.device(device) if device is not None else torch.device("cpu")
|
| 201 |
+
Log.info(
|
| 202 |
+
f"Loading model: model_type={model_type}, checkpoint={model_path}, device={load_device}"
|
| 203 |
+
)
|
| 204 |
self._cache[key] = build_model(
|
| 205 |
model_type=model_type,
|
| 206 |
model_path=model_path,
|
| 207 |
+
device=load_device,
|
| 208 |
)
|
| 209 |
else:
|
| 210 |
Log.info(f"Using cached model: model_type={model_type}, checkpoint={model_path}")
|
| 211 |
+
|
| 212 |
+
if device is not None:
|
| 213 |
+
target_device = torch.device(device)
|
| 214 |
+
current_device = self._get_model_device(self._cache[key])
|
| 215 |
+
if current_device != target_device:
|
| 216 |
+
Log.info(
|
| 217 |
+
f"Moving cached model to device: model_type={model_type}, "
|
| 218 |
+
f"checkpoint={model_path}, {current_device} -> {target_device}"
|
| 219 |
+
)
|
| 220 |
+
self._cache[key] = self._cache[key].to(target_device)
|
| 221 |
+
self._cache[key].eval()
|
| 222 |
return self._cache[key]
|
| 223 |
|
| 224 |
+
def preload(self, model_type: str, model_path: str) -> None:
|
| 225 |
+
self.get(model_type=model_type, model_path=model_path, device=torch.device("cpu"))
|
| 226 |
+
|
| 227 |
|
| 228 |
def _parse_image_size(size_text: str) -> tuple[int, int]:
|
| 229 |
try:
|
|
|
|
| 442 |
ckpt_path = resolve_checkpoint_path(model_type)
|
| 443 |
_report_stage(stage_callback, f"demo:checkpoint_resolved path={ckpt_path}")
|
| 444 |
model_cache = model_cache or ModelCache()
|
| 445 |
+
model = model_cache.get(model_type=model_type, model_path=ckpt_path, device=device)
|
| 446 |
_report_stage(stage_callback, "demo:model_loaded")
|
| 447 |
|
| 448 |
query_2d_uniform_coord = make_2d_uniform_coord((h_out, w_out)).unsqueeze(0).to(device)
|
|
|
|
| 594 |
prompt_mask = prompt > 0
|
| 595 |
|
| 596 |
ckpt_path = resolve_checkpoint_path(model_type)
|
| 597 |
+
_report_stage(stage_callback, "gpu:loading_model")
|
| 598 |
model_cache = model_cache or ModelCache()
|
| 599 |
+
model = model_cache.get(model_type=model_type, model_path=ckpt_path, device=device)
|
| 600 |
if _debug:
|
| 601 |
torch.cuda.synchronize()
|
| 602 |
Log.info(f"[GPU-DEBUG] model_loaded: GPU mem allocated={torch.cuda.memory_allocated(device) / 1e6:.1f}MB")
|
InfiniDepth/utils/moge_utils.py
CHANGED
|
@@ -4,29 +4,48 @@ from typing import Optional
|
|
| 4 |
|
| 5 |
import torch
|
| 6 |
|
|
|
|
| 7 |
|
| 8 |
-
_MOGE2_MODEL_CACHE: dict[
|
| 9 |
|
| 10 |
|
| 11 |
-
def
|
| 12 |
-
cache_key = (pretrained_model_name_or_path, str(device))
|
| 13 |
-
if cache_key in _MOGE2_MODEL_CACHE:
|
| 14 |
-
return _MOGE2_MODEL_CACHE[cache_key]
|
| 15 |
-
|
| 16 |
try:
|
| 17 |
-
|
| 18 |
-
except
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
model
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
return model
|
| 28 |
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
def _squeeze_hw(tensor: torch.Tensor, name: str) -> torch.Tensor:
|
| 31 |
if tensor.ndim == 2:
|
| 32 |
return tensor
|
|
|
|
| 4 |
|
| 5 |
import torch
|
| 6 |
|
| 7 |
+
from .logger import Log
|
| 8 |
|
| 9 |
+
_MOGE2_MODEL_CACHE: dict[str, torch.nn.Module] = {}
|
| 10 |
|
| 11 |
|
| 12 |
+
def _get_model_device(model: torch.nn.Module) -> torch.device:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
try:
|
| 14 |
+
return next(model.parameters()).device
|
| 15 |
+
except StopIteration:
|
| 16 |
+
return torch.device("cpu")
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def _get_moge2_model(pretrained_model_name_or_path: str, device: torch.device) -> torch.nn.Module:
|
| 20 |
+
model = _MOGE2_MODEL_CACHE.get(pretrained_model_name_or_path)
|
| 21 |
+
if model is None:
|
| 22 |
+
try:
|
| 23 |
+
from moge.model.v2 import MoGeModel
|
| 24 |
+
except ImportError as exc:
|
| 25 |
+
raise ImportError(
|
| 26 |
+
"MoGe is not installed. Please install it first: "
|
| 27 |
+
"`pip install git+https://github.com/microsoft/MoGe.git`"
|
| 28 |
+
) from exc
|
| 29 |
+
|
| 30 |
+
Log.info(f"Loading MoGe-2 model from {pretrained_model_name_or_path} on CPU")
|
| 31 |
+
model = MoGeModel.from_pretrained(pretrained_model_name_or_path)
|
| 32 |
+
model.eval()
|
| 33 |
+
_MOGE2_MODEL_CACHE[pretrained_model_name_or_path] = model
|
| 34 |
+
|
| 35 |
+
current_device = _get_model_device(model)
|
| 36 |
+
target_device = torch.device(device)
|
| 37 |
+
if current_device != target_device:
|
| 38 |
+
Log.info(f"Moving MoGe-2 model to device: {current_device} -> {target_device}")
|
| 39 |
+
model = model.to(target_device)
|
| 40 |
+
model.eval()
|
| 41 |
+
_MOGE2_MODEL_CACHE[pretrained_model_name_or_path] = model
|
| 42 |
return model
|
| 43 |
|
| 44 |
|
| 45 |
+
def preload_moge2_model(pretrained_model_name_or_path: str) -> None:
|
| 46 |
+
_get_moge2_model(pretrained_model_name_or_path, torch.device("cpu"))
|
| 47 |
+
|
| 48 |
+
|
| 49 |
def _squeeze_hw(tensor: torch.Tensor, name: str) -> torch.Tensor:
|
| 50 |
if tensor.ndim == 2:
|
| 51 |
return tensor
|
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import os
|
| 2 |
import shutil
|
| 3 |
import tempfile
|
|
|
|
| 4 |
import traceback
|
| 5 |
import uuid
|
| 6 |
from pathlib import Path
|
|
@@ -23,7 +24,13 @@ import gradio as gr
|
|
| 23 |
import numpy as np
|
| 24 |
from PIL import Image
|
| 25 |
|
| 26 |
-
from InfiniDepth.utils.hf_demo_utils import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
from InfiniDepth.utils.logger import Log
|
| 28 |
|
| 29 |
try:
|
|
@@ -33,6 +40,8 @@ except ImportError:
|
|
| 33 |
|
| 34 |
|
| 35 |
MODEL_CACHE = ModelCache()
|
|
|
|
|
|
|
| 36 |
OUTPUT_ROOT = Path(tempfile.gettempdir()) / "infinidepth_hf_demo"
|
| 37 |
TRACE_ROOT = OUTPUT_ROOT / "trace"
|
| 38 |
EXAMPLE_DATA_ROOT = Path(__file__).resolve().parent / "example_data"
|
|
@@ -207,7 +216,7 @@ def _append_trace(trace_path: str, stage: str) -> None:
|
|
| 207 |
trace_file = Path(trace_path)
|
| 208 |
trace_file.parent.mkdir(parents=True, exist_ok=True)
|
| 209 |
with trace_file.open("a", encoding="utf-8") as handle:
|
| 210 |
-
handle.write(f"{stage}\n")
|
| 211 |
handle.flush()
|
| 212 |
|
| 213 |
|
|
@@ -396,6 +405,8 @@ def run_demo(
|
|
| 396 |
if os.getenv("INFINIDEPTH_SHOW_TRACEBACK", "0") == "1":
|
| 397 |
error_message = f"{error_message}\n\n{error_trace}"
|
| 398 |
# Always log full traceback to server logs (visible in HF Space Logs tab)
|
|
|
|
|
|
|
| 399 |
Log.error(f"[{request_id}] Full traceback:\n{error_trace}")
|
| 400 |
return None, None, [], error_message
|
| 401 |
|
|
@@ -427,7 +438,7 @@ with gr.Blocks(title="InfiniDepth Demo", theme=gr.themes.Soft(), css=CUSTOM_CSS,
|
|
| 427 |
)
|
| 428 |
input_size = gr.Dropdown(
|
| 429 |
choices=["512x672", "768x1024"],
|
| 430 |
-
value=
|
| 431 |
label="Inference Resolution (HxW)",
|
| 432 |
)
|
| 433 |
output_resolution_mode = gr.Dropdown(
|
|
@@ -461,7 +472,8 @@ with gr.Blocks(title="InfiniDepth Demo", theme=gr.themes.Soft(), css=CUSTOM_CSS,
|
|
| 461 |
gr.Markdown(
|
| 462 |
"Tips: when a depth map is uploaded it will be used automatically, otherwise the demo falls back to MoGe-2. "
|
| 463 |
"If camera intrinsics are missing, the demo first tries MoGe-2 estimates before image-size defaults. "
|
| 464 |
-
"Use lower preview points for faster 3D interaction."
|
|
|
|
| 465 |
)
|
| 466 |
|
| 467 |
with gr.Column(elem_id="right-panel"):
|
|
@@ -522,7 +534,9 @@ demo = demo.queue()
|
|
| 522 |
|
| 523 |
if __name__ == "__main__":
|
| 524 |
prepare_runtime_assets()
|
| 525 |
-
|
|
|
|
|
|
|
| 526 |
# Hugging Face Spaces with Gradio typically expects port 7860.
|
| 527 |
# Respect explicit Gradio/PORT env overrides when provided.
|
| 528 |
server_port = int(os.getenv("GRADIO_SERVER_PORT", os.getenv("PORT", "7860")))
|
|
@@ -530,7 +544,7 @@ if __name__ == "__main__":
|
|
| 530 |
"server_name": server_name,
|
| 531 |
"server_port": server_port,
|
| 532 |
}
|
| 533 |
-
if
|
| 534 |
launch_kwargs["ssr_mode"] = False
|
| 535 |
if os.getenv("INFINIDEPTH_SHOW_ERROR", "0") == "1":
|
| 536 |
launch_kwargs["show_error"] = True
|
|
|
|
| 1 |
import os
|
| 2 |
import shutil
|
| 3 |
import tempfile
|
| 4 |
+
import time
|
| 5 |
import traceback
|
| 6 |
import uuid
|
| 7 |
from pathlib import Path
|
|
|
|
| 24 |
import numpy as np
|
| 25 |
from PIL import Image
|
| 26 |
|
| 27 |
+
from InfiniDepth.utils.hf_demo_utils import (
|
| 28 |
+
ModelCache,
|
| 29 |
+
postprocess_gpu_result,
|
| 30 |
+
prepare_runtime_assets,
|
| 31 |
+
preload_space_runtime_models,
|
| 32 |
+
run_gpu_inference,
|
| 33 |
+
)
|
| 34 |
from InfiniDepth.utils.logger import Log
|
| 35 |
|
| 36 |
try:
|
|
|
|
| 40 |
|
| 41 |
|
| 42 |
MODEL_CACHE = ModelCache()
|
| 43 |
+
SPACE_RUNTIME = bool(os.getenv("SPACE_ID"))
|
| 44 |
+
DEFAULT_INPUT_SIZE = "512x672" if SPACE_RUNTIME else "768x1024"
|
| 45 |
OUTPUT_ROOT = Path(tempfile.gettempdir()) / "infinidepth_hf_demo"
|
| 46 |
TRACE_ROOT = OUTPUT_ROOT / "trace"
|
| 47 |
EXAMPLE_DATA_ROOT = Path(__file__).resolve().parent / "example_data"
|
|
|
|
| 216 |
trace_file = Path(trace_path)
|
| 217 |
trace_file.parent.mkdir(parents=True, exist_ok=True)
|
| 218 |
with trace_file.open("a", encoding="utf-8") as handle:
|
| 219 |
+
handle.write(f"{time.strftime('%H:%M:%S')} {stage}\n")
|
| 220 |
handle.flush()
|
| 221 |
|
| 222 |
|
|
|
|
| 405 |
if os.getenv("INFINIDEPTH_SHOW_TRACEBACK", "0") == "1":
|
| 406 |
error_message = f"{error_message}\n\n{error_trace}"
|
| 407 |
# Always log full traceback to server logs (visible in HF Space Logs tab)
|
| 408 |
+
Log.error(f"[{request_id}] trace_path={trace_path}")
|
| 409 |
+
Log.error(f"[{request_id}] Last worker stages:\n{trace_summary or '<none>'}")
|
| 410 |
Log.error(f"[{request_id}] Full traceback:\n{error_trace}")
|
| 411 |
return None, None, [], error_message
|
| 412 |
|
|
|
|
| 438 |
)
|
| 439 |
input_size = gr.Dropdown(
|
| 440 |
choices=["512x672", "768x1024"],
|
| 441 |
+
value=DEFAULT_INPUT_SIZE,
|
| 442 |
label="Inference Resolution (HxW)",
|
| 443 |
)
|
| 444 |
output_resolution_mode = gr.Dropdown(
|
|
|
|
| 472 |
gr.Markdown(
|
| 473 |
"Tips: when a depth map is uploaded it will be used automatically, otherwise the demo falls back to MoGe-2. "
|
| 474 |
"If camera intrinsics are missing, the demo first tries MoGe-2 estimates before image-size defaults. "
|
| 475 |
+
"Use lower preview points for faster 3D interaction. "
|
| 476 |
+
"On ZeroGPU, `512x672` is the safest default for cold starts."
|
| 477 |
)
|
| 478 |
|
| 479 |
with gr.Column(elem_id="right-panel"):
|
|
|
|
| 534 |
|
| 535 |
if __name__ == "__main__":
|
| 536 |
prepare_runtime_assets()
|
| 537 |
+
if os.getenv("INFINIDEPTH_PRELOAD_DEFAULT_MODELS", "1" if SPACE_RUNTIME else "0") == "1":
|
| 538 |
+
preload_space_runtime_models(model_cache=MODEL_CACHE, default_model_type="InfiniDepth")
|
| 539 |
+
server_name = "0.0.0.0" if SPACE_RUNTIME else "127.0.0.1"
|
| 540 |
# Hugging Face Spaces with Gradio typically expects port 7860.
|
| 541 |
# Respect explicit Gradio/PORT env overrides when provided.
|
| 542 |
server_port = int(os.getenv("GRADIO_SERVER_PORT", os.getenv("PORT", "7860")))
|
|
|
|
| 544 |
"server_name": server_name,
|
| 545 |
"server_port": server_port,
|
| 546 |
}
|
| 547 |
+
if SPACE_RUNTIME:
|
| 548 |
launch_kwargs["ssr_mode"] = False
|
| 549 |
if os.getenv("INFINIDEPTH_SHOW_ERROR", "0") == "1":
|
| 550 |
launch_kwargs["show_error"] = True
|