depth-anything-3

Runtime error

App Files Files Community

wop commited on Dec 1, 2025

Commit

b15831b

verified ·

1 Parent(s): d3d9a93

Update app.py

Browse files

Files changed (1) hide show

app.py +131 -45

app.py CHANGED Viewed

@@ -1,63 +1,149 @@
 import os
 import torch
 import gradio as gr
-from depth_anything_3.model import DepthAnything3
-# ---------------------------------------------------------
-#  CPU-safe DepthAnything3 loader
-# ---------------------------------------------------------
-def load_model_cpu(model_dir):
-    print("🔄 Loading DepthAnything3 model on CPU...")
-    model = DepthAnything3.from_pretrained(model_dir, config_name="config.json")
-    model.to("cpu")
-    model.eval()
-    print("✅ Model ready on CPU")
-    return model
 MODEL_DIR = os.environ.get("DA3_MODEL_DIR", "depth-anything/DA3NESTED-GIANT-LARGE")
-model = load_model_cpu(MODEL_DIR)
-# ---------------------------------------------------------
-#  CPU-safe inference (single image only)
-# ---------------------------------------------------------
-def run_depth(img):
     """
-    CPU version of depth inference.
-    No batching, no multiview, no GS, no reconstruction.
     """
-    if img is None:
         return None
-    with torch.no_grad():
-        depth = model.infer_image(img, device="cpu")
-    return depth
-# ---------------------------------------------------------
-#  Minimal Gradio UI (fast startup)
-# ---------------------------------------------------------
-title = "Depth Anything 3 — CPU Mode (Safe HF Version)"
-description = """
-This Hugging Face Space runs **DepthAnything3** in CPU-only mode.
-Only single-image depth inference is enabled.
-All heavy multiview / GS / reconstruction features were removed so the Space can boot on CPU.
-"""
-demo = gr.Interface(
     fn=run_depth,
-    inputs=gr.Image(type="pil", label="Upload an image"),
-    outputs=gr.Image(label="Predicted Depth"),
     title=title,
     description=description,
 )
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

+# app.py (safe CPU startup for HF Spaces)
 import os
+import io
+import numpy as np
 import torch
+from PIL import Image
 import gradio as gr
+# Import the CPU-patched class you added earlier
+from depth_anything_3.api import DepthAnything3
+# ---------------------------
+# Configuration
+# ---------------------------
+# Keep the same model path you used earlier (default is the one in your logs)
 MODEL_DIR = os.environ.get("DA3_MODEL_DIR", "depth-anything/DA3NESTED-GIANT-LARGE")
+# Lower processing resolution to make CPU inference feasible.
+# Increase if you want better quality but expect it to be much slower.
+PROCESS_RES = int(os.environ.get("DA3_PROCESS_RES", "384"))
+# ---------------------------
+# Model loading (CPU)
+# ---------------------------
+print(f"🔄 Loading DepthAnything3 from '{MODEL_DIR}' on CPU (this may take a moment)...")
+# Uses the PyTorchModelHubMixin.from_pretrained you have in the class
+model = DepthAnything3.from_pretrained(MODEL_DIR)
+model.to(torch.device("cpu"))
+model.eval()
+print("✅ Model ready on CPU")
+# ---------------------------
+# Inference helper
+# ---------------------------
+def _normalize_depth_to_uint8(depth: np.ndarray) -> np.ndarray:
+    """Normalize a depth map (H,W) to uint8 grayscale for display."""
+    if depth is None:
+        return None
+    # convert to float
+    d = depth.astype(np.float32)
+    # clip NaNs / infs
+    d = np.nan_to_num(d, nan=0.0, posinf=0.0, neginf=0.0)
+    # Normalize robustly: use 1st and 99th percentiles to avoid outliers
+    vmin = np.percentile(d, 1.0)
+    vmax = np.percentile(d, 99.0)
+    if vmax - vmin < 1e-6:
+        vmax = vmin + 1.0
+    d = (d - vmin) / (vmax - vmin)
+    d = np.clip(d, 0.0, 1.0)
+    img = (d * 255.0).astype(np.uint8)
+    return img
+def run_depth(single_img: Image.Image, process_res: int = PROCESS_RES):
     """
+    Run single-image depth inference with the patched DepthAnything3 API.
+    Returns a grayscale PIL image visualizing depth.
     """
+    if single_img is None:
         return None
+    # Convert PIL to numpy (DepthAnything3 accepts PIL images)
+    try:
+        # Use the API's inference function; we pass a list with single image.
+        # Keep other args minimal to avoid heavy processing.
+        pred = model.inference(
+            [single_img],
+            process_res=process_res,
+            process_res_method="upper_bound_resize",
+            export_format="mini_npz",  # minimal export
+        )
+    except Exception as e:
+        # If inference raises, return a helpful message image
+        msg = f"Inference error: {e}"
+        print(msg)
+        # Make a small image with the error text
+        err_img = Image.new("RGB", (640, 120), color=(255, 255, 255))
+        return err_img
+    # Extract depth from Prediction object - handle a few possible shapes / attrs
+    depth_map = None
+    # First try attribute .depth (common pattern in your code)
+    if hasattr(pred, "depth"):
+        depth_map = pred.depth
+    elif isinstance(pred, dict) and "depth" in pred:
+        depth_map = pred["depth"]
+    elif hasattr(pred, "predictions") and len(pred.predictions) > 0:
+        # fallback: some wrappers store lists
+        depth_map = pred.predictions[0].depth if hasattr(pred.predictions[0], "depth") else None
+    # depth_map might be (N,H,W) or (H,W)
+    if depth_map is None:
+        # fallback: try processed_images if available (visual sanity)
+        try:
+            if hasattr(pred, "processed_images"):
+                imgs = pred.processed_images
+                if isinstance(imgs, np.ndarray) and imgs.shape[0] > 0:
+                    # return first processed image
+                    return Image.fromarray((imgs[0] * 255).astype(np.uint8))
+        except Exception:
+            pass
+        # nothing usable
+        print("No depth found in prediction; returning empty image.")
+        return Image.new("RGB", (640, 480), color=(255, 255, 255))
+    # If depth_map is batched, take first
+    if isinstance(depth_map, (list, tuple)):
+        depth_map = depth_map[0]
+    if isinstance(depth_map, np.ndarray) and depth_map.ndim == 3 and depth_map.shape[0] in (1,):
+        # shape (1,H,W)
+        depth_map = depth_map[0]
+    if isinstance(depth_map, torch.Tensor):
+        depth_map = depth_map.cpu().numpy()
+    # Now depth_map should be (H,W)
+    if depth_map.ndim == 3 and depth_map.shape[0] == 3:
+        # if somehow 3-channel, convert to single channel by averaging
+        depth_map = depth_map.mean(axis=0)
+    depth_uint8 = _normalize_depth_to_uint8(depth_map)
+    if depth_uint8 is None:
+        return Image.new("RGB", (640, 480), color=(255, 255, 255))
+    # Return grayscale PIL image
+    depth_img = Image.fromarray(depth_uint8, mode="L")
+    return depth_img
+# ---------------------------
+# Gradio interface
+# ---------------------------
+title = "Depth Anything 3 — CPU (single-image)"
+description = (
+    "CPU-only minimal interface. Upload a single image and get a quick depth visualization.\n"
+    "This Space is intentionally lightweight to allow CPU startup. For better quality/multiview features you need GPU or the full app."
+)
+# Make the Gradio Interface the top-level `app` variable so HF Spaces detects it
+app = gr.Interface(
     fn=run_depth,
+    inputs=[
+        gr.Image(type="pil", label="Upload image"),
+        gr.Slider(minimum=128, maximum=1024, step=64, value=PROCESS_RES, label="Process resolution (smaller = faster)")
+    ],
+    outputs=gr.Image(label="Predicted depth (grayscale)"),
     title=title,
     description=description,
 )
+# For local running
 if __name__ == "__main__":
+    app.launch(server_name="0.0.0.0", server_port=7860)